Skip to main content

reddb_server/
ai.rs

1//! External AI provider integration primitives.
2//!
3//! This module currently supports OpenAI embeddings and is intended to be
4//! consumed by server handlers and future query/runtime integrations.
5
6use std::io::BufRead;
7use std::time::Duration;
8
9use crate::json::{parse_json, Map, Value as JsonValue};
10use crate::{RedDBError, RedDBResult};
11
12/// Shared HTTP helper for every outbound AI provider call. Centralises
13/// the ureq 3.x builder and error conversion. Returns
14/// `(status, body)` even on 4xx/5xx (via
15/// `http_status_as_error(false)`) so callers can format a
16/// provider-specific error without re-plumbing the 3.x error enum.
17fn http_post_json(
18    url: &str,
19    api_key: &str,
20    extra_headers: &[(&str, &str)],
21    payload: String,
22    read_timeout_secs: u64,
23) -> Result<(u16, String), String> {
24    let agent: ureq::Agent = ureq::Agent::config_builder()
25        .timeout_connect(Some(Duration::from_secs(10)))
26        .timeout_send_request(Some(Duration::from_secs(30)))
27        .timeout_recv_response(Some(Duration::from_secs(read_timeout_secs)))
28        .timeout_recv_body(Some(Duration::from_secs(read_timeout_secs)))
29        .http_status_as_error(false)
30        .build()
31        .into();
32
33    let mut req = agent
34        .post(url)
35        .header("Content-Type", "application/json")
36        .header("Accept", "application/json");
37    for (k, v) in extra_headers {
38        req = req.header(*k, *v);
39    }
40    let trimmed_key = api_key.trim();
41    if !trimmed_key.is_empty() {
42        req = req.header("Authorization", &format!("Bearer {}", trimmed_key));
43    }
44
45    match req.send(payload) {
46        Ok(mut resp) => {
47            let status = resp.status().as_u16();
48            let body = resp
49                .body_mut()
50                .read_to_string()
51                .map_err(|err| format!("failed to read response body: {err}"))?;
52            Ok((status, body))
53        }
54        Err(err) => Err(format!("{err}")),
55    }
56}
57
58pub const DEFAULT_OPENAI_EMBEDDING_MODEL: &str = "text-embedding-3-small";
59pub const DEFAULT_OPENAI_API_BASE: &str = "https://api.openai.com/v1";
60pub const DEFAULT_OPENAI_PROMPT_MODEL: &str = "gpt-4.1-mini";
61pub const DEFAULT_ANTHROPIC_PROMPT_MODEL: &str = "claude-3-5-haiku-latest";
62pub const DEFAULT_ANTHROPIC_API_BASE: &str = "https://api.anthropic.com/v1";
63pub const DEFAULT_ANTHROPIC_VERSION: &str = "2023-06-01";
64
65#[derive(Debug, Clone)]
66pub struct OpenAiEmbeddingRequest {
67    pub api_key: String,
68    pub model: String,
69    pub inputs: Vec<String>,
70    pub dimensions: Option<usize>,
71    pub api_base: String,
72}
73
74#[derive(Debug, Clone)]
75pub struct OpenAiEmbeddingResponse {
76    pub provider: &'static str,
77    pub model: String,
78    pub embeddings: Vec<Vec<f32>>,
79    pub prompt_tokens: Option<u64>,
80    pub total_tokens: Option<u64>,
81}
82
83#[derive(Debug, Clone)]
84pub struct OpenAiPromptRequest {
85    pub api_key: String,
86    pub model: String,
87    pub prompt: String,
88    pub temperature: Option<f32>,
89    pub seed: Option<u64>,
90    pub max_output_tokens: Option<usize>,
91    pub api_base: String,
92    pub stream: bool,
93}
94
95#[derive(Debug, Clone)]
96pub struct AnthropicPromptRequest {
97    pub api_key: String,
98    pub model: String,
99    pub prompt: String,
100    pub temperature: Option<f32>,
101    pub max_output_tokens: Option<usize>,
102    pub api_base: String,
103    pub anthropic_version: String,
104}
105
106#[derive(Debug, Clone)]
107pub struct AiPromptResponse {
108    pub provider: &'static str,
109    pub model: String,
110    pub output_text: String,
111    pub output_chunks: Option<Vec<String>>,
112    pub prompt_tokens: Option<u64>,
113    pub completion_tokens: Option<u64>,
114    pub total_tokens: Option<u64>,
115    pub stop_reason: Option<String>,
116}
117
118#[deprecated(
119    since = "1.0.0",
120    note = "use AiBatchClient::embed_batch for embeddings or openai_embeddings_async with AiTransport when token usage metadata is required"
121)]
122pub fn openai_embeddings(request: OpenAiEmbeddingRequest) -> RedDBResult<OpenAiEmbeddingResponse> {
123    if request.model.trim().is_empty() {
124        return Err(RedDBError::Query(
125            "OpenAI embedding model cannot be empty".to_string(),
126        ));
127    }
128    if request.inputs.is_empty() {
129        return Err(RedDBError::Query(
130            "at least one input is required for embeddings".to_string(),
131        ));
132    }
133
134    let url = format!("{}/embeddings", request.api_base.trim_end_matches('/'));
135    let payload =
136        build_openai_embedding_payload(&request.model, &request.inputs, request.dimensions);
137
138    let (status, body) = http_post_json(&url, &request.api_key, &[], payload, 90)
139        .map_err(|err| RedDBError::Query(format!("OpenAI transport error: {err}")))?;
140
141    if !(200..300).contains(&status) {
142        let message = openai_error_message(&body)
143            .unwrap_or_else(|| "OpenAI embeddings request failed".to_string());
144        return Err(RedDBError::Query(format!(
145            "OpenAI embeddings request failed (status {status}): {message}"
146        )));
147    }
148
149    parse_openai_embedding_response(&body)
150}
151
152#[deprecated(since = "1.0.0", note = "use openai_prompt_async with AiTransport")]
153pub fn openai_prompt(request: OpenAiPromptRequest) -> RedDBResult<AiPromptResponse> {
154    if request.model.trim().is_empty() {
155        return Err(RedDBError::Query(
156            "OpenAI prompt model cannot be empty".to_string(),
157        ));
158    }
159    if request.prompt.trim().is_empty() {
160        return Err(RedDBError::Query("prompt cannot be empty".to_string()));
161    }
162
163    let url = format!(
164        "{}/chat/completions",
165        request.api_base.trim_end_matches('/')
166    );
167    let payload = build_openai_prompt_payload(
168        &request.model,
169        &request.prompt,
170        request.temperature,
171        request.seed,
172        request.max_output_tokens,
173        false,
174    );
175
176    let (status, body) = http_post_json(&url, &request.api_key, &[], payload, 120)
177        .map_err(|err| RedDBError::Query(format!("OpenAI transport error: {err}")))?;
178
179    if !(200..300).contains(&status) {
180        let message = openai_error_message(&body)
181            .unwrap_or_else(|| "OpenAI prompt request failed".to_string());
182        return Err(RedDBError::Query(format!(
183            "OpenAI prompt request failed (status {status}): {message}"
184        )));
185    }
186
187    parse_openai_prompt_response(&body, &request.model)
188}
189
190#[deprecated(since = "1.0.0", note = "use anthropic_prompt_async with AiTransport")]
191pub fn anthropic_prompt(request: AnthropicPromptRequest) -> RedDBResult<AiPromptResponse> {
192    if request.api_key.trim().is_empty() {
193        return Err(RedDBError::Query(
194            "Anthropic API key cannot be empty".to_string(),
195        ));
196    }
197    if request.model.trim().is_empty() {
198        return Err(RedDBError::Query(
199            "Anthropic model cannot be empty".to_string(),
200        ));
201    }
202    if request.prompt.trim().is_empty() {
203        return Err(RedDBError::Query("prompt cannot be empty".to_string()));
204    }
205
206    let url = format!("{}/messages", request.api_base.trim_end_matches('/'));
207    let payload = build_anthropic_prompt_payload(
208        &request.model,
209        &request.prompt,
210        request.temperature,
211        request.max_output_tokens,
212    );
213
214    // Anthropic uses its own `x-api-key` header instead of
215    // `Authorization: Bearer`, so skip the shared helper's default
216    // auth header path — we pass an empty API key and set
217    // `x-api-key` via extra_headers instead.
218    let extra = [
219        ("x-api-key", request.api_key.as_str()),
220        ("anthropic-version", request.anthropic_version.as_str()),
221    ];
222    let (status, body) = http_post_json(&url, "", &extra, payload, 120)
223        .map_err(|err| RedDBError::Query(format!("Anthropic transport error: {err}")))?;
224
225    if !(200..300).contains(&status) {
226        let message = anthropic_error_message(&body)
227            .unwrap_or_else(|| "Anthropic prompt request failed".to_string());
228        return Err(RedDBError::Query(format!(
229            "Anthropic prompt request failed (status {status}): {message}"
230        )));
231    }
232
233    parse_anthropic_prompt_response(&body, &request.model)
234}
235
236/// Async OpenAI-compatible embeddings via [`AiTransport`].
237///
238/// Uses the transport's connection pool and retry policy (429/5xx backoff)
239/// instead of the deprecated one-shot blocking path.
240pub async fn openai_embeddings_async(
241    transport: &crate::runtime::ai::transport::AiTransport,
242    request: OpenAiEmbeddingRequest,
243) -> RedDBResult<OpenAiEmbeddingResponse> {
244    if request.model.trim().is_empty() {
245        return Err(RedDBError::Query(
246            "OpenAI embedding model cannot be empty".to_string(),
247        ));
248    }
249    if request.inputs.is_empty() {
250        return Err(RedDBError::Query(
251            "at least one input is required for embeddings".to_string(),
252        ));
253    }
254
255    let url = format!("{}/embeddings", request.api_base.trim_end_matches('/'));
256    let payload =
257        build_openai_embedding_payload(&request.model, &request.inputs, request.dimensions);
258    let mut http_req =
259        crate::runtime::ai::transport::AiHttpRequest::post_json("openai-compatible", url, payload);
260    let trimmed_key = request.api_key.trim();
261    if !trimmed_key.is_empty() {
262        http_req = http_req.header("authorization", format!("Bearer {}", trimmed_key));
263    }
264
265    let response = transport
266        .request(http_req)
267        .await
268        .map_err(|e| RedDBError::Query(e.to_string()))?;
269
270    parse_openai_embedding_response(&response.body)
271}
272
273/// Async OpenAI chat-completion prompt via [`AiTransport`].
274///
275/// Uses the transport's connection pool and retry policy (429/5xx backoff)
276/// instead of the deprecated one-shot blocking path.
277pub async fn openai_prompt_async(
278    transport: &crate::runtime::ai::transport::AiTransport,
279    request: OpenAiPromptRequest,
280) -> RedDBResult<AiPromptResponse> {
281    if request.model.trim().is_empty() {
282        return Err(RedDBError::Query(
283            "OpenAI prompt model cannot be empty".to_string(),
284        ));
285    }
286    if request.prompt.trim().is_empty() {
287        return Err(RedDBError::Query("prompt cannot be empty".to_string()));
288    }
289
290    let url = format!(
291        "{}/chat/completions",
292        request.api_base.trim_end_matches('/')
293    );
294    let payload = build_openai_prompt_payload(
295        &request.model,
296        &request.prompt,
297        request.temperature,
298        request.seed,
299        request.max_output_tokens,
300        request.stream,
301    );
302    let http_req = crate::runtime::ai::transport::AiHttpRequest::post_json("openai", url, payload)
303        .model(request.model.clone())
304        .header("authorization", format!("Bearer {}", request.api_key));
305
306    let response = transport
307        .request(http_req)
308        .await
309        .map_err(|e| RedDBError::Query(e.to_string()))?;
310
311    if request.stream {
312        parse_openai_streaming_prompt_response(&response.body, &request.model)
313    } else {
314        parse_openai_prompt_response(&response.body, &request.model)
315    }
316}
317
318/// Blocking OpenAI-compatible streaming prompt.
319///
320/// This is used by the socket-level `ASK ... STREAM` path so each provider
321/// `delta.content` can be forwarded to the HTTP client before the provider
322/// body has completed.
323pub fn openai_prompt_streaming(
324    request: OpenAiPromptRequest,
325    mut on_chunk: impl FnMut(&str) -> RedDBResult<()>,
326) -> RedDBResult<AiPromptResponse> {
327    if request.model.trim().is_empty() {
328        return Err(RedDBError::Query(
329            "OpenAI prompt model cannot be empty".to_string(),
330        ));
331    }
332    if request.prompt.trim().is_empty() {
333        return Err(RedDBError::Query("prompt cannot be empty".to_string()));
334    }
335
336    let url = format!(
337        "{}/chat/completions",
338        request.api_base.trim_end_matches('/')
339    );
340    let payload = build_openai_prompt_payload(
341        &request.model,
342        &request.prompt,
343        request.temperature,
344        request.seed,
345        request.max_output_tokens,
346        true,
347    );
348
349    let agent: ureq::Agent = ureq::Agent::config_builder()
350        .timeout_connect(Some(Duration::from_secs(10)))
351        .timeout_send_request(Some(Duration::from_secs(30)))
352        .timeout_recv_response(Some(Duration::from_secs(120)))
353        .timeout_recv_body(Some(Duration::from_secs(120)))
354        .http_status_as_error(false)
355        .build()
356        .into();
357
358    let mut req = agent
359        .post(&url)
360        .header("content-type", "application/json")
361        .header("accept", "text/event-stream");
362    let trimmed_key = request.api_key.trim();
363    if !trimmed_key.is_empty() {
364        req = req.header("authorization", &format!("Bearer {}", trimmed_key));
365    }
366
367    let mut response = req
368        .send(payload)
369        .map_err(|err| RedDBError::Query(format!("OpenAI transport error: {err}")))?;
370    let status = response.status().as_u16();
371    if !(200..300).contains(&status) {
372        let body = response
373            .body_mut()
374            .read_to_string()
375            .unwrap_or_else(|err| format!("failed to read response body: {err}"));
376        let message = openai_error_message(&body)
377            .unwrap_or_else(|| "OpenAI prompt request failed".to_string());
378        return Err(RedDBError::Query(format!(
379            "OpenAI prompt request failed (status {status}): {message}"
380        )));
381    }
382
383    let mut model = request.model;
384    let mut chunks = Vec::new();
385    let mut prompt_tokens = None;
386    let mut completion_tokens = None;
387    let mut total_tokens = None;
388    let mut stop_reason = None;
389
390    let mut reader = std::io::BufReader::new(response.body_mut().as_reader());
391    let mut line = String::new();
392    loop {
393        line.clear();
394        let read = reader.read_line(&mut line).map_err(|err| {
395            RedDBError::Query(format!("failed to read OpenAI streaming response: {err}"))
396        })?;
397        if read == 0 {
398            break;
399        }
400
401        let trimmed = line.trim();
402        let Some(data) = trimmed.strip_prefix("data:") else {
403            continue;
404        };
405        let data = data.trim();
406        if data.is_empty() {
407            continue;
408        }
409        if data == "[DONE]" {
410            break;
411        }
412
413        let parsed = parse_json(data).map_err(|err| {
414            RedDBError::Query(format!(
415                "invalid OpenAI streaming prompt JSON response: {err}"
416            ))
417        })?;
418        let json = JsonValue::from(parsed);
419        if let Some(value) = json.get("model").and_then(JsonValue::as_str) {
420            model = value.to_string();
421        }
422        if let Some(usage) = json.get("usage") {
423            prompt_tokens = usage
424                .get("prompt_tokens")
425                .and_then(JsonValue::as_i64)
426                .and_then(|value| u64::try_from(value).ok())
427                .or(prompt_tokens);
428            completion_tokens = usage
429                .get("completion_tokens")
430                .and_then(JsonValue::as_i64)
431                .and_then(|value| u64::try_from(value).ok())
432                .or(completion_tokens);
433            total_tokens = usage
434                .get("total_tokens")
435                .and_then(JsonValue::as_i64)
436                .and_then(|value| u64::try_from(value).ok())
437                .or(total_tokens);
438        }
439
440        let Some(choices) = json.get("choices").and_then(JsonValue::as_array) else {
441            continue;
442        };
443        let Some(first_choice) = choices.first() else {
444            continue;
445        };
446        if let Some(reason) = first_choice
447            .get("finish_reason")
448            .and_then(JsonValue::as_str)
449        {
450            stop_reason = Some(reason.to_string());
451        }
452        if let Some(text) = first_choice
453            .get("delta")
454            .and_then(|delta| delta.get("content"))
455            .and_then(JsonValue::as_str)
456        {
457            if !text.is_empty() {
458                on_chunk(text)?;
459                chunks.push(text.to_string());
460            }
461        }
462    }
463
464    if chunks.is_empty() {
465        return Err(RedDBError::Query(
466            "OpenAI streaming prompt response missing text content".to_string(),
467        ));
468    }
469
470    let output_text = chunks.concat();
471    let total_tokens = total_tokens.or_else(|| match (prompt_tokens, completion_tokens) {
472        (Some(prompt), Some(completion)) => Some(prompt.saturating_add(completion)),
473        _ => None,
474    });
475
476    Ok(AiPromptResponse {
477        provider: "openai",
478        model,
479        output_text,
480        output_chunks: Some(chunks),
481        prompt_tokens,
482        completion_tokens,
483        total_tokens,
484        stop_reason,
485    })
486}
487
488/// Async Anthropic messages-API prompt via [`AiTransport`].
489///
490/// Uses the transport's connection pool and retry policy (429/5xx backoff)
491/// instead of the deprecated one-shot blocking path.
492pub async fn anthropic_prompt_async(
493    transport: &crate::runtime::ai::transport::AiTransport,
494    request: AnthropicPromptRequest,
495) -> RedDBResult<AiPromptResponse> {
496    if request.api_key.trim().is_empty() {
497        return Err(RedDBError::Query(
498            "Anthropic API key cannot be empty".to_string(),
499        ));
500    }
501    if request.model.trim().is_empty() {
502        return Err(RedDBError::Query(
503            "Anthropic model cannot be empty".to_string(),
504        ));
505    }
506    if request.prompt.trim().is_empty() {
507        return Err(RedDBError::Query("prompt cannot be empty".to_string()));
508    }
509
510    let url = format!("{}/messages", request.api_base.trim_end_matches('/'));
511    let payload = build_anthropic_prompt_payload(
512        &request.model,
513        &request.prompt,
514        request.temperature,
515        request.max_output_tokens,
516    );
517    let http_req =
518        crate::runtime::ai::transport::AiHttpRequest::post_json("anthropic", url, payload)
519            .model(request.model.clone())
520            .header("x-api-key", request.api_key)
521            .header("anthropic-version", request.anthropic_version);
522
523    let response = transport
524        .request(http_req)
525        .await
526        .map_err(|e| RedDBError::Query(e.to_string()))?;
527
528    parse_anthropic_prompt_response(&response.body, &request.model)
529}
530
531/// Build an OpenAI-compatible embedding request payload.
532pub(crate) fn build_embedding_payload(model: &str, inputs: &[String]) -> String {
533    build_openai_embedding_payload(model, inputs, None)
534}
535
536/// Parse an OpenAI-compatible embedding response, returning only the vectors.
537pub(crate) fn parse_embedding_vectors(body: &str) -> Result<Vec<Vec<f32>>, String> {
538    parse_openai_embedding_response(body)
539        .map(|r| r.embeddings)
540        .map_err(|e| e.to_string())
541}
542
543pub(crate) fn parse_embedding_response(body: &str) -> Result<OpenAiEmbeddingResponse, String> {
544    parse_openai_embedding_response(body).map_err(|e| e.to_string())
545}
546
547fn build_openai_embedding_payload(
548    model: &str,
549    inputs: &[String],
550    dimensions: Option<usize>,
551) -> String {
552    let mut object = Map::new();
553    object.insert("model".to_string(), JsonValue::String(model.to_string()));
554    if inputs.len() == 1 {
555        object.insert("input".to_string(), JsonValue::String(inputs[0].clone()));
556    } else {
557        object.insert(
558            "input".to_string(),
559            JsonValue::Array(inputs.iter().cloned().map(JsonValue::String).collect()),
560        );
561    }
562    if let Some(dimensions) = dimensions {
563        object.insert(
564            "dimensions".to_string(),
565            JsonValue::Number(dimensions as f64),
566        );
567    }
568    object.insert(
569        "encoding_format".to_string(),
570        JsonValue::String("float".to_string()),
571    );
572    JsonValue::Object(object).to_string_compact()
573}
574
575fn openai_error_message(body: &str) -> Option<String> {
576    provider_error_message(body)
577}
578
579fn anthropic_error_message(body: &str) -> Option<String> {
580    provider_error_message(body)
581}
582
583fn provider_error_message(body: &str) -> Option<String> {
584    let parsed = parse_json(body).ok().map(JsonValue::from)?;
585    let error = parsed.get("error")?;
586    if let Some(message) = error.get("message").and_then(JsonValue::as_str) {
587        let trimmed = message.trim();
588        if !trimmed.is_empty() {
589            return Some(trimmed.to_string());
590        }
591    }
592    None
593}
594
595fn build_openai_prompt_payload(
596    model: &str,
597    prompt: &str,
598    temperature: Option<f32>,
599    seed: Option<u64>,
600    max_output_tokens: Option<usize>,
601    stream: bool,
602) -> String {
603    let mut object = Map::new();
604    object.insert("model".to_string(), JsonValue::String(model.to_string()));
605
606    let mut message = Map::new();
607    message.insert("role".to_string(), JsonValue::String("user".to_string()));
608    message.insert("content".to_string(), JsonValue::String(prompt.to_string()));
609    object.insert(
610        "messages".to_string(),
611        JsonValue::Array(vec![JsonValue::Object(message)]),
612    );
613
614    if let Some(temperature) = temperature {
615        object.insert(
616            "temperature".to_string(),
617            JsonValue::Number(temperature as f64),
618        );
619    }
620
621    if let Some(seed) = seed {
622        object.insert("seed".to_string(), JsonValue::Number(seed as f64));
623    }
624
625    if let Some(max_output_tokens) = max_output_tokens {
626        object.insert(
627            "max_tokens".to_string(),
628            JsonValue::Number(max_output_tokens as f64),
629        );
630    }
631
632    if stream {
633        object.insert("stream".to_string(), JsonValue::Bool(true));
634        let mut options = Map::new();
635        options.insert("include_usage".to_string(), JsonValue::Bool(true));
636        object.insert("stream_options".to_string(), JsonValue::Object(options));
637    }
638
639    JsonValue::Object(object).to_string_compact()
640}
641
642fn build_anthropic_prompt_payload(
643    model: &str,
644    prompt: &str,
645    temperature: Option<f32>,
646    max_output_tokens: Option<usize>,
647) -> String {
648    let mut object = Map::new();
649    object.insert("model".to_string(), JsonValue::String(model.to_string()));
650    object.insert(
651        "max_tokens".to_string(),
652        JsonValue::Number(max_output_tokens.unwrap_or(512) as f64),
653    );
654
655    let mut message = Map::new();
656    message.insert("role".to_string(), JsonValue::String("user".to_string()));
657    message.insert("content".to_string(), JsonValue::String(prompt.to_string()));
658    object.insert(
659        "messages".to_string(),
660        JsonValue::Array(vec![JsonValue::Object(message)]),
661    );
662
663    if let Some(temperature) = temperature {
664        object.insert(
665            "temperature".to_string(),
666            JsonValue::Number(temperature as f64),
667        );
668    }
669
670    JsonValue::Object(object).to_string_compact()
671}
672
673fn extract_text_from_parts(parts: &[JsonValue]) -> Option<String> {
674    let mut chunks = Vec::new();
675    for part in parts {
676        if let Some(text) = part.as_str() {
677            let trimmed = text.trim();
678            if !trimmed.is_empty() {
679                chunks.push(trimmed.to_string());
680            }
681            continue;
682        }
683
684        let Some(object) = part.as_object() else {
685            continue;
686        };
687        let Some(text) = object.get("text").and_then(JsonValue::as_str) else {
688            continue;
689        };
690        let trimmed = text.trim();
691        if !trimmed.is_empty() {
692            chunks.push(trimmed.to_string());
693        }
694    }
695
696    if chunks.is_empty() {
697        None
698    } else {
699        Some(chunks.join("\n\n"))
700    }
701}
702
703fn parse_openai_prompt_response(
704    body: &str,
705    requested_model: &str,
706) -> RedDBResult<AiPromptResponse> {
707    let parsed = parse_json(body)
708        .map_err(|err| RedDBError::Query(format!("invalid OpenAI prompt JSON response: {err}")))?;
709    let json = JsonValue::from(parsed);
710
711    let model = json
712        .get("model")
713        .and_then(JsonValue::as_str)
714        .unwrap_or(requested_model)
715        .to_string();
716
717    let Some(choices) = json.get("choices").and_then(JsonValue::as_array) else {
718        return Err(RedDBError::Query(
719            "OpenAI prompt response missing 'choices' array".to_string(),
720        ));
721    };
722    let Some(first_choice) = choices.first() else {
723        return Err(RedDBError::Query(
724            "OpenAI prompt response contains no choices".to_string(),
725        ));
726    };
727
728    let output_text = first_choice
729        .get("message")
730        .and_then(|message| {
731            if let Some(text) = message.get("content").and_then(JsonValue::as_str) {
732                let trimmed = text.trim();
733                if !trimmed.is_empty() {
734                    return Some(trimmed.to_string());
735                }
736            }
737            message
738                .get("content")
739                .and_then(JsonValue::as_array)
740                .and_then(extract_text_from_parts)
741        })
742        .ok_or_else(|| {
743            RedDBError::Query("OpenAI prompt response missing text content".to_string())
744        })?;
745
746    let prompt_tokens = json
747        .get("usage")
748        .and_then(|usage| usage.get("prompt_tokens"))
749        .and_then(JsonValue::as_i64)
750        .and_then(|value| u64::try_from(value).ok());
751    let completion_tokens = json
752        .get("usage")
753        .and_then(|usage| usage.get("completion_tokens"))
754        .and_then(JsonValue::as_i64)
755        .and_then(|value| u64::try_from(value).ok());
756    let total_tokens = json
757        .get("usage")
758        .and_then(|usage| usage.get("total_tokens"))
759        .and_then(JsonValue::as_i64)
760        .and_then(|value| u64::try_from(value).ok())
761        .or_else(|| match (prompt_tokens, completion_tokens) {
762            (Some(prompt), Some(completion)) => Some(prompt.saturating_add(completion)),
763            _ => None,
764        });
765
766    let stop_reason = first_choice
767        .get("finish_reason")
768        .and_then(JsonValue::as_str)
769        .map(str::to_string);
770
771    Ok(AiPromptResponse {
772        provider: "openai",
773        model,
774        output_text,
775        output_chunks: None,
776        prompt_tokens,
777        completion_tokens,
778        total_tokens,
779        stop_reason,
780    })
781}
782
783fn parse_openai_streaming_prompt_response(
784    body: &str,
785    requested_model: &str,
786) -> RedDBResult<AiPromptResponse> {
787    let mut model = requested_model.to_string();
788    let mut chunks = Vec::new();
789    let mut prompt_tokens = None;
790    let mut completion_tokens = None;
791    let mut total_tokens = None;
792    let mut stop_reason = None;
793
794    for line in body.lines() {
795        let line = line.trim();
796        let Some(data) = line.strip_prefix("data:") else {
797            continue;
798        };
799        let data = data.trim();
800        if data.is_empty() {
801            continue;
802        }
803        if data == "[DONE]" {
804            break;
805        }
806
807        let parsed = parse_json(data).map_err(|err| {
808            RedDBError::Query(format!(
809                "invalid OpenAI streaming prompt JSON response: {err}"
810            ))
811        })?;
812        let json = JsonValue::from(parsed);
813        if let Some(value) = json.get("model").and_then(JsonValue::as_str) {
814            model = value.to_string();
815        }
816        if let Some(usage) = json.get("usage") {
817            prompt_tokens = usage
818                .get("prompt_tokens")
819                .and_then(JsonValue::as_i64)
820                .and_then(|value| u64::try_from(value).ok())
821                .or(prompt_tokens);
822            completion_tokens = usage
823                .get("completion_tokens")
824                .and_then(JsonValue::as_i64)
825                .and_then(|value| u64::try_from(value).ok())
826                .or(completion_tokens);
827            total_tokens = usage
828                .get("total_tokens")
829                .and_then(JsonValue::as_i64)
830                .and_then(|value| u64::try_from(value).ok())
831                .or(total_tokens);
832        }
833
834        let Some(choices) = json.get("choices").and_then(JsonValue::as_array) else {
835            continue;
836        };
837        let Some(first_choice) = choices.first() else {
838            continue;
839        };
840        if let Some(reason) = first_choice
841            .get("finish_reason")
842            .and_then(JsonValue::as_str)
843        {
844            stop_reason = Some(reason.to_string());
845        }
846        if let Some(text) = first_choice
847            .get("delta")
848            .and_then(|delta| delta.get("content"))
849            .and_then(JsonValue::as_str)
850        {
851            if !text.is_empty() {
852                chunks.push(text.to_string());
853            }
854        }
855    }
856
857    if chunks.is_empty() {
858        return Err(RedDBError::Query(
859            "OpenAI streaming prompt response missing text content".to_string(),
860        ));
861    }
862
863    let output_text = chunks.concat();
864    let total_tokens = total_tokens.or_else(|| match (prompt_tokens, completion_tokens) {
865        (Some(prompt), Some(completion)) => Some(prompt.saturating_add(completion)),
866        _ => None,
867    });
868
869    Ok(AiPromptResponse {
870        provider: "openai",
871        model,
872        output_text,
873        output_chunks: Some(chunks),
874        prompt_tokens,
875        completion_tokens,
876        total_tokens,
877        stop_reason,
878    })
879}
880
881fn parse_anthropic_prompt_response(
882    body: &str,
883    requested_model: &str,
884) -> RedDBResult<AiPromptResponse> {
885    let parsed = parse_json(body).map_err(|err| {
886        RedDBError::Query(format!("invalid Anthropic prompt JSON response: {err}"))
887    })?;
888    let json = JsonValue::from(parsed);
889
890    let model = json
891        .get("model")
892        .and_then(JsonValue::as_str)
893        .unwrap_or(requested_model)
894        .to_string();
895
896    let Some(content_parts) = json.get("content").and_then(JsonValue::as_array) else {
897        return Err(RedDBError::Query(
898            "Anthropic prompt response missing 'content' array".to_string(),
899        ));
900    };
901
902    let output_text = extract_text_from_parts(content_parts).ok_or_else(|| {
903        RedDBError::Query("Anthropic prompt response missing text content".to_string())
904    })?;
905
906    let prompt_tokens = json
907        .get("usage")
908        .and_then(|usage| usage.get("input_tokens"))
909        .and_then(JsonValue::as_i64)
910        .and_then(|value| u64::try_from(value).ok());
911    let completion_tokens = json
912        .get("usage")
913        .and_then(|usage| usage.get("output_tokens"))
914        .and_then(JsonValue::as_i64)
915        .and_then(|value| u64::try_from(value).ok());
916    let total_tokens = match (prompt_tokens, completion_tokens) {
917        (Some(prompt), Some(completion)) => Some(prompt.saturating_add(completion)),
918        _ => None,
919    };
920
921    let stop_reason = json
922        .get("stop_reason")
923        .and_then(JsonValue::as_str)
924        .map(str::to_string);
925
926    Ok(AiPromptResponse {
927        provider: "anthropic",
928        model,
929        output_text,
930        output_chunks: None,
931        prompt_tokens,
932        completion_tokens,
933        total_tokens,
934        stop_reason,
935    })
936}
937
938fn parse_openai_embedding_response(body: &str) -> RedDBResult<OpenAiEmbeddingResponse> {
939    let parsed = parse_json(body).map_err(|err| {
940        RedDBError::Query(format!("invalid OpenAI embeddings JSON response: {err}"))
941    })?;
942    let json = JsonValue::from(parsed);
943
944    let model = json
945        .get("model")
946        .and_then(JsonValue::as_str)
947        .unwrap_or(DEFAULT_OPENAI_EMBEDDING_MODEL)
948        .to_string();
949
950    let Some(data) = json.get("data").and_then(JsonValue::as_array) else {
951        return Err(RedDBError::Query(
952            "OpenAI response missing 'data' array".to_string(),
953        ));
954    };
955
956    let mut rows: Vec<(usize, Vec<f32>)> = Vec::with_capacity(data.len());
957    for (position, item) in data.iter().enumerate() {
958        let index = item
959            .get("index")
960            .and_then(JsonValue::as_i64)
961            .and_then(|value| usize::try_from(value).ok())
962            .unwrap_or(position);
963
964        let Some(embedding_values) = item.get("embedding").and_then(JsonValue::as_array) else {
965            return Err(RedDBError::Query(
966                "OpenAI response contains item without 'embedding' array".to_string(),
967            ));
968        };
969        if embedding_values.is_empty() {
970            return Err(RedDBError::Query(
971                "OpenAI response contains empty embedding vector".to_string(),
972            ));
973        }
974
975        let mut embedding = Vec::with_capacity(embedding_values.len());
976        for value in embedding_values {
977            let Some(number) = value.as_f64() else {
978                return Err(RedDBError::Query(
979                    "OpenAI response contains non-numeric embedding value".to_string(),
980                ));
981            };
982            embedding.push(number as f32);
983        }
984        rows.push((index, embedding));
985    }
986    rows.sort_by_key(|(index, _)| *index);
987    let embeddings = rows.into_iter().map(|(_, embedding)| embedding).collect();
988
989    let prompt_tokens = json
990        .get("usage")
991        .and_then(|usage| usage.get("prompt_tokens"))
992        .and_then(JsonValue::as_i64)
993        .and_then(|value| u64::try_from(value).ok());
994    let total_tokens = json
995        .get("usage")
996        .and_then(|usage| usage.get("total_tokens"))
997        .and_then(JsonValue::as_i64)
998        .and_then(|value| u64::try_from(value).ok());
999
1000    Ok(OpenAiEmbeddingResponse {
1001        provider: "openai",
1002        model,
1003        embeddings,
1004        prompt_tokens,
1005        total_tokens,
1006    })
1007}
1008
1009#[cfg(test)]
1010mod tests {
1011    use super::*;
1012
1013    #[test]
1014    fn parse_openai_embedding_response_extracts_vectors() {
1015        let body = r#"{
1016          "object":"list",
1017          "data":[
1018            {"object":"embedding","index":1,"embedding":[0.3,0.4]},
1019            {"object":"embedding","index":0,"embedding":[0.1,0.2]}
1020          ],
1021          "model":"text-embedding-3-small",
1022          "usage":{"prompt_tokens":12,"total_tokens":12}
1023        }"#;
1024
1025        let result = parse_openai_embedding_response(body).expect("response should parse");
1026        assert_eq!(result.provider, "openai");
1027        assert_eq!(result.model, "text-embedding-3-small");
1028        assert_eq!(result.embeddings.len(), 2);
1029        assert_eq!(result.embeddings[0], vec![0.1, 0.2]);
1030        assert_eq!(result.embeddings[1], vec![0.3, 0.4]);
1031        assert_eq!(result.prompt_tokens, Some(12));
1032        assert_eq!(result.total_tokens, Some(12));
1033    }
1034
1035    #[test]
1036    fn openai_error_message_extracts_nested_message() {
1037        let body = r#"{"error":{"message":"bad api key","type":"invalid_request_error"}}"#;
1038        assert_eq!(openai_error_message(body).as_deref(), Some("bad api key"));
1039    }
1040
1041    #[test]
1042    fn parse_openai_prompt_response_extracts_text_and_usage() {
1043        let body = r#"{
1044          "id":"chatcmpl_1",
1045          "object":"chat.completion",
1046          "model":"gpt-4.1-mini",
1047          "choices":[
1048            {
1049              "index":0,
1050              "finish_reason":"stop",
1051              "message":{"role":"assistant","content":"Resumo pronto."}
1052            }
1053          ],
1054          "usage":{"prompt_tokens":10,"completion_tokens":4,"total_tokens":14}
1055        }"#;
1056
1057        let parsed =
1058            parse_openai_prompt_response(body, DEFAULT_OPENAI_PROMPT_MODEL).expect("parse");
1059        assert_eq!(parsed.provider, "openai");
1060        assert_eq!(parsed.model, "gpt-4.1-mini");
1061        assert_eq!(parsed.output_text, "Resumo pronto.");
1062        assert_eq!(parsed.prompt_tokens, Some(10));
1063        assert_eq!(parsed.completion_tokens, Some(4));
1064        assert_eq!(parsed.total_tokens, Some(14));
1065        assert_eq!(parsed.stop_reason.as_deref(), Some("stop"));
1066    }
1067
1068    #[test]
1069    fn parse_anthropic_prompt_response_extracts_text_and_usage() {
1070        let body = r#"{
1071          "id":"msg_1",
1072          "model":"claude-3-5-haiku-latest",
1073          "type":"message",
1074          "content":[{"type":"text","text":"Action complete."}],
1075          "usage":{"input_tokens":11,"output_tokens":5},
1076          "stop_reason":"end_turn"
1077        }"#;
1078
1079        let parsed =
1080            parse_anthropic_prompt_response(body, DEFAULT_ANTHROPIC_PROMPT_MODEL).expect("parse");
1081        assert_eq!(parsed.provider, "anthropic");
1082        assert_eq!(parsed.model, "claude-3-5-haiku-latest");
1083        assert_eq!(parsed.output_text, "Action complete.");
1084        assert_eq!(parsed.prompt_tokens, Some(11));
1085        assert_eq!(parsed.completion_tokens, Some(5));
1086        assert_eq!(parsed.total_tokens, Some(16));
1087        assert_eq!(parsed.stop_reason.as_deref(), Some("end_turn"));
1088    }
1089
1090    #[test]
1091    fn resolve_api_key_prefers_vault_secret_over_legacy_config() {
1092        let provider = AiProvider::OpenAi;
1093        let alias = "vault_unit_alias";
1094        let secret_path = ai_api_secret_path(&provider, alias);
1095        let legacy_key = ai_api_legacy_config_key(&provider, alias);
1096
1097        let resolved = resolve_api_key(&provider, Some(alias), |key| {
1098            if key == secret_path {
1099                Ok(Some("vault-key".to_string()))
1100            } else if key == legacy_key {
1101                Ok(Some("legacy-key".to_string()))
1102            } else {
1103                Ok(None)
1104            }
1105        })
1106        .expect("resolve");
1107
1108        assert_eq!(resolved, "vault-key");
1109    }
1110
1111    #[test]
1112    fn resolve_api_key_uses_default_vault_secret_path() {
1113        let provider = AiProvider::OpenAi;
1114        let secret_path = ai_api_secret_path(&provider, "default");
1115
1116        let resolved = resolve_api_key(&provider, None, |key| {
1117            if key == secret_path {
1118                Ok(Some("default-vault-key".to_string()))
1119            } else {
1120                Ok(None)
1121            }
1122        })
1123        .expect("resolve");
1124
1125        assert_eq!(resolved, "default-vault-key");
1126    }
1127
1128    #[test]
1129    fn openai_prompt_payload_includes_temperature_and_seed_when_present() {
1130        let payload = build_openai_prompt_payload(
1131            "gpt-4.1-mini",
1132            "hello",
1133            Some(0.0),
1134            Some(42),
1135            Some(128),
1136            false,
1137        );
1138        let parsed = JsonValue::from(parse_json(&payload).expect("valid json"));
1139
1140        assert_eq!(
1141            parsed.get("temperature").and_then(JsonValue::as_f64),
1142            Some(0.0)
1143        );
1144        assert_eq!(parsed.get("seed").and_then(JsonValue::as_u64), Some(42));
1145        assert_eq!(
1146            parsed.get("max_tokens").and_then(JsonValue::as_u64),
1147            Some(128)
1148        );
1149    }
1150
1151    #[test]
1152    fn openai_prompt_payload_omits_seed_when_none() {
1153        let payload =
1154            build_openai_prompt_payload("gpt-4.1-mini", "hello", Some(0.0), None, None, false);
1155        let parsed = JsonValue::from(parse_json(&payload).expect("valid json"));
1156
1157        assert!(parsed.get("seed").is_none());
1158        assert!(parsed.get("stream").is_none());
1159        assert_eq!(
1160            parsed.get("temperature").and_then(JsonValue::as_f64),
1161            Some(0.0)
1162        );
1163    }
1164
1165    #[test]
1166    fn openai_prompt_payload_enables_stream_options() {
1167        let payload =
1168            build_openai_prompt_payload("gpt-4.1-mini", "hello", Some(0.0), None, None, true);
1169        let parsed = JsonValue::from(parse_json(&payload).expect("valid json"));
1170
1171        assert_eq!(
1172            parsed.get("stream").and_then(JsonValue::as_bool),
1173            Some(true)
1174        );
1175        assert_eq!(
1176            parsed
1177                .get("stream_options")
1178                .and_then(|value| value.get("include_usage"))
1179                .and_then(JsonValue::as_bool),
1180            Some(true)
1181        );
1182    }
1183
1184    #[test]
1185    fn openai_streaming_prompt_response_collects_delta_chunks() {
1186        let body = concat!(
1187            "data: {\"model\":\"gpt-test\",\"choices\":[{\"delta\":{\"content\":\"login \"},\"finish_reason\":null}]}\n\n",
1188            "data: {\"model\":\"gpt-test\",\"choices\":[{\"delta\":{\"content\":\"failed\"},\"finish_reason\":null}]}\n\n",
1189            "data: {\"model\":\"gpt-test\",\"choices\":[{\"delta\":{},\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":12,\"completion_tokens\":2,\"total_tokens\":14}}\n\n",
1190            "data: [DONE]\n\n",
1191        );
1192        let parsed = parse_openai_streaming_prompt_response(body, "fallback").unwrap();
1193
1194        assert_eq!(parsed.model, "gpt-test");
1195        assert_eq!(parsed.output_text, "login failed");
1196        assert_eq!(
1197            parsed.output_chunks.as_deref(),
1198            Some(["login ".to_string(), "failed".to_string()].as_slice())
1199        );
1200        assert_eq!(parsed.prompt_tokens, Some(12));
1201        assert_eq!(parsed.completion_tokens, Some(2));
1202        assert_eq!(parsed.total_tokens, Some(14));
1203        assert_eq!(parsed.stop_reason.as_deref(), Some("stop"));
1204    }
1205
1206    #[tokio::test]
1207    async fn openai_prompt_async_rejects_empty_model() {
1208        let transport = crate::runtime::ai::transport::AiTransport::new(Default::default());
1209        let request = OpenAiPromptRequest {
1210            api_key: "key".to_string(),
1211            model: "  ".to_string(),
1212            prompt: "hello".to_string(),
1213            temperature: None,
1214            seed: None,
1215            max_output_tokens: None,
1216            api_base: "https://api.openai.com/v1".to_string(),
1217            stream: false,
1218        };
1219        let err = openai_prompt_async(&transport, request).await.unwrap_err();
1220        assert!(err.to_string().contains("model cannot be empty"));
1221    }
1222
1223    #[tokio::test]
1224    async fn openai_prompt_async_rejects_empty_prompt() {
1225        let transport = crate::runtime::ai::transport::AiTransport::new(Default::default());
1226        let request = OpenAiPromptRequest {
1227            api_key: "key".to_string(),
1228            model: "gpt-4.1-mini".to_string(),
1229            prompt: "".to_string(),
1230            temperature: None,
1231            seed: None,
1232            max_output_tokens: None,
1233            api_base: "https://api.openai.com/v1".to_string(),
1234            stream: false,
1235        };
1236        let err = openai_prompt_async(&transport, request).await.unwrap_err();
1237        assert!(err.to_string().contains("prompt cannot be empty"));
1238    }
1239
1240    #[tokio::test]
1241    async fn anthropic_prompt_async_rejects_empty_api_key() {
1242        let transport = crate::runtime::ai::transport::AiTransport::new(Default::default());
1243        let request = AnthropicPromptRequest {
1244            api_key: "  ".to_string(),
1245            model: "claude-3-5-haiku-latest".to_string(),
1246            prompt: "hello".to_string(),
1247            temperature: None,
1248            max_output_tokens: None,
1249            api_base: "https://api.anthropic.com/v1".to_string(),
1250            anthropic_version: DEFAULT_ANTHROPIC_VERSION.to_string(),
1251        };
1252        let err = anthropic_prompt_async(&transport, request)
1253            .await
1254            .unwrap_err();
1255        assert!(err.to_string().contains("API key cannot be empty"));
1256    }
1257}
1258
1259// ============================================================================
1260// Provider & Credential Resolution (shared between HTTP, gRPC, and runtime)
1261// ============================================================================
1262
1263/// AI provider identifier.
1264#[derive(Debug, Clone, PartialEq, Eq)]
1265pub enum AiProvider {
1266    OpenAi,
1267    Anthropic,
1268    Groq,
1269    OpenRouter,
1270    Together,
1271    Venice,
1272    Ollama,
1273    DeepSeek,
1274    HuggingFace,
1275    Local,
1276    Custom(String),
1277}
1278
1279impl AiProvider {
1280    pub fn token(&self) -> &str {
1281        match self {
1282            Self::OpenAi => "openai",
1283            Self::Anthropic => "anthropic",
1284            Self::Groq => "groq",
1285            Self::OpenRouter => "openrouter",
1286            Self::Together => "together",
1287            Self::Venice => "venice",
1288            Self::Ollama => "ollama",
1289            Self::DeepSeek => "deepseek",
1290            Self::HuggingFace => "huggingface",
1291            Self::Local => "local",
1292            Self::Custom(name) => name.as_str(),
1293        }
1294    }
1295
1296    pub fn default_prompt_model(&self) -> &str {
1297        match self {
1298            Self::OpenAi => DEFAULT_OPENAI_PROMPT_MODEL,
1299            Self::Anthropic => DEFAULT_ANTHROPIC_PROMPT_MODEL,
1300            Self::Groq => "llama-3.3-70b-versatile",
1301            Self::OpenRouter => "auto",
1302            Self::Together => "meta-llama/Meta-Llama-3-8B-Instruct",
1303            Self::Venice => "llama-3.3-70b",
1304            Self::Ollama => "llama3",
1305            Self::DeepSeek => "deepseek-chat",
1306            Self::HuggingFace => "mistralai/Mistral-7B-Instruct-v0.3",
1307            Self::Local => "sentence-transformers/all-MiniLM-L6-v2",
1308            Self::Custom(_) => DEFAULT_OPENAI_PROMPT_MODEL,
1309        }
1310    }
1311
1312    pub fn prompt_model_env_name(&self) -> String {
1313        format!("REDDB_{}_PROMPT_MODEL", self.token().to_ascii_uppercase())
1314    }
1315
1316    pub fn default_embedding_model(&self) -> &str {
1317        match self {
1318            Self::Ollama => "nomic-embed-text",
1319            Self::HuggingFace | Self::Local => "sentence-transformers/all-MiniLM-L6-v2",
1320            _ => DEFAULT_OPENAI_EMBEDDING_MODEL,
1321        }
1322    }
1323
1324    pub fn default_api_base(&self) -> &str {
1325        match self {
1326            Self::OpenAi => DEFAULT_OPENAI_API_BASE,
1327            Self::Anthropic => DEFAULT_ANTHROPIC_API_BASE,
1328            Self::Groq => "https://api.groq.com/openai/v1",
1329            Self::OpenRouter => "https://openrouter.ai/api/v1",
1330            Self::Together => "https://api.together.xyz/v1",
1331            Self::Venice => "https://api.venice.ai/api/v1",
1332            Self::Ollama => "http://localhost:11434/v1",
1333            Self::DeepSeek => "https://api.deepseek.com/v1",
1334            Self::HuggingFace => "https://api-inference.huggingface.co",
1335            Self::Local => "local",
1336            Self::Custom(base) => base.as_str(),
1337        }
1338    }
1339
1340    pub fn api_base_env_name(&self) -> String {
1341        format!("REDDB_{}_API_BASE", self.token().to_ascii_uppercase())
1342    }
1343
1344    pub fn default_key_env_name(&self) -> String {
1345        format!("REDDB_{}_API_KEY", self.token().to_ascii_uppercase())
1346    }
1347
1348    pub fn alias_key_env_name(&self, alias: &str) -> String {
1349        let normalized = normalize_alias_token(alias);
1350        format!(
1351            "REDDB_{}_API_KEY_{normalized}",
1352            self.token().to_ascii_uppercase()
1353        )
1354    }
1355
1356    pub fn resolve_api_base(&self) -> String {
1357        if let Ok(value) = std::env::var(self.api_base_env_name()) {
1358            let value = value.trim().to_string();
1359            if !value.is_empty() {
1360                return value;
1361            }
1362        }
1363        self.default_api_base().to_string()
1364    }
1365
1366    /// Resolve API base URL checking KV store too (for custom base_url config).
1367    pub fn resolve_api_base_with_kv<F>(&self, alias: &str, kv_getter: &F) -> String
1368    where
1369        F: Fn(&str) -> crate::RedDBResult<Option<String>>,
1370    {
1371        // 1. Env var
1372        if let Ok(value) = std::env::var(self.api_base_env_name()) {
1373            let value = value.trim().to_string();
1374            if !value.is_empty() {
1375                return value;
1376            }
1377        }
1378        // 2. KV store: {provider}/{alias}/base_url
1379        let kv_key = format!("red.config.ai.{}.{alias}.base_url", self.token());
1380        if let Ok(Some(value)) = kv_getter(&kv_key) {
1381            let value = value.trim().to_string();
1382            if !value.is_empty() {
1383                return value;
1384            }
1385        }
1386        self.default_api_base().to_string()
1387    }
1388
1389    /// Whether this provider uses the OpenAI-compatible API format.
1390    pub fn is_openai_compatible(&self) -> bool {
1391        matches!(
1392            self,
1393            Self::OpenAi
1394                | Self::Groq
1395                | Self::OpenRouter
1396                | Self::Together
1397                | Self::Venice
1398                | Self::Ollama
1399                | Self::DeepSeek
1400                | Self::Custom(_)
1401        )
1402    }
1403
1404    /// Whether this provider requires an API key (Ollama/Local don't).
1405    pub fn requires_api_key(&self) -> bool {
1406        !matches!(self, Self::Ollama | Self::Local)
1407    }
1408}
1409
1410/// Parse a provider string into AiProvider.
1411pub fn parse_provider(name: &str) -> crate::RedDBResult<AiProvider> {
1412    match name.trim().to_ascii_lowercase().as_str() {
1413        "openai" => Ok(AiProvider::OpenAi),
1414        "anthropic" => Ok(AiProvider::Anthropic),
1415        "groq" => Ok(AiProvider::Groq),
1416        "openrouter" | "open_router" => Ok(AiProvider::OpenRouter),
1417        "together" => Ok(AiProvider::Together),
1418        "venice" => Ok(AiProvider::Venice),
1419        "ollama" => Ok(AiProvider::Ollama),
1420        "deepseek" | "deep_seek" => Ok(AiProvider::DeepSeek),
1421        "huggingface" | "hf" => Ok(AiProvider::HuggingFace),
1422        "local" => Ok(AiProvider::Local),
1423        other => {
1424            // Treat as custom provider if it looks like a URL
1425            if other.starts_with("http://") || other.starts_with("https://") {
1426                Ok(AiProvider::Custom(other.to_string()))
1427            } else {
1428                Err(crate::RedDBError::Query(format!(
1429                    "unsupported AI provider '{other}'; expected: openai, anthropic, groq, \
1430                     openrouter, together, venice, ollama, deepseek, huggingface, local"
1431                )))
1432            }
1433        }
1434    }
1435}
1436
1437/// Resolve the default AI provider. Checks:
1438/// 1. `REDDB_AI_PROVIDER` env var
1439/// 2. `red_config` KV key `red.config.ai.default.provider`
1440/// 3. Falls back to OpenAI
1441pub fn resolve_default_provider<F>(kv_getter: &F) -> AiProvider
1442where
1443    F: Fn(&str) -> crate::RedDBResult<Option<String>>,
1444{
1445    // 1. Env var
1446    if let Ok(value) = std::env::var("REDDB_AI_PROVIDER") {
1447        let value = value.trim().to_string();
1448        if !value.is_empty() {
1449            if let Ok(provider) = parse_provider(&value) {
1450                return provider;
1451            }
1452        }
1453    }
1454    // 2. KV store
1455    if let Ok(Some(value)) = kv_getter("red.config.ai.default.provider") {
1456        let value = value.trim().to_string();
1457        if !value.is_empty() {
1458            if let Ok(provider) = parse_provider(&value) {
1459                return provider;
1460            }
1461        }
1462    }
1463    AiProvider::OpenAi
1464}
1465
1466/// Resolve the default AI model. Checks:
1467/// 1. `REDDB_AI_MODEL` env var
1468/// 2. `red_config` KV key `red.config.ai.default.model`
1469/// 3. Falls back to provider's default
1470pub fn resolve_default_model<F>(provider: &AiProvider, kv_getter: &F) -> String
1471where
1472    F: Fn(&str) -> crate::RedDBResult<Option<String>>,
1473{
1474    // 1. Env var
1475    if let Ok(value) = std::env::var("REDDB_AI_MODEL") {
1476        let value = value.trim().to_string();
1477        if !value.is_empty() {
1478            return value;
1479        }
1480    }
1481    // 2. Provider-specific env var
1482    if let Ok(value) = std::env::var(provider.prompt_model_env_name()) {
1483        let value = value.trim().to_string();
1484        if !value.is_empty() {
1485            return value;
1486        }
1487    }
1488    // 3. KV store
1489    if let Ok(Some(value)) = kv_getter("red.config.ai.default.model") {
1490        let value = value.trim().to_string();
1491        if !value.is_empty() {
1492            return value;
1493        }
1494    }
1495    provider.default_prompt_model().to_string()
1496}
1497
1498/// Resolve default provider + model from runtime KV store.
1499pub fn resolve_defaults_from_runtime(
1500    runtime: &crate::runtime::RedDBRuntime,
1501) -> (AiProvider, String) {
1502    use crate::application::ports::RuntimeEntityPort;
1503    let kv_getter = |key: &str| -> crate::RedDBResult<Option<String>> {
1504        match runtime.get_kv("red_config", key)? {
1505            Some((crate::storage::schema::Value::Text(s), _)) => Ok(Some(s.to_string())),
1506            _ => Ok(None),
1507        }
1508    };
1509    let provider = resolve_default_provider(&kv_getter);
1510    let model = resolve_default_model(&provider, &kv_getter);
1511    (provider, model)
1512}
1513
1514/// Resolve default provider + model via RuntimeEntityPort trait (for use in QueryUseCases).
1515pub fn resolve_defaults_from_runtime_port<
1516    P: crate::application::ports::RuntimeEntityPort + ?Sized,
1517>(
1518    runtime: &P,
1519) -> (AiProvider, String) {
1520    let kv_getter = |key: &str| -> crate::RedDBResult<Option<String>> {
1521        match runtime.get_kv("red_config", key)? {
1522            Some((crate::storage::schema::Value::Text(s), _)) => Ok(Some(s.to_string())),
1523            _ => Ok(None),
1524        }
1525    };
1526    let provider = resolve_default_provider(&kv_getter);
1527    let model = resolve_default_model(&provider, &kv_getter);
1528    (provider, model)
1529}
1530
1531/// Resolve an API key for a provider. Uses the chain:
1532/// 1. Environment variable with alias: `REDDB_OPENAI_API_KEY_{ALIAS}`
1533/// 2. Vault secret lookup via `kv_getter` closure
1534/// 3. Legacy KV store lookup via `kv_getter` closure
1535/// 4. Default environment variable: `REDDB_OPENAI_API_KEY`
1536///
1537/// `kv_getter` receives either a `red.secret.*` path or a legacy `red.config.*`
1538/// key and returns the value if found.
1539pub fn resolve_api_key<F>(
1540    provider: &AiProvider,
1541    credential_alias: Option<&str>,
1542    kv_getter: F,
1543) -> crate::RedDBResult<String>
1544where
1545    F: Fn(&str) -> crate::RedDBResult<Option<String>>,
1546{
1547    // Providers that don't require API keys
1548    if !provider.requires_api_key() {
1549        // Still try to find a key (user may have one for auth'd Ollama)
1550        if let Ok(value) = std::env::var(provider.default_key_env_name()) {
1551            let value = value.trim().to_string();
1552            if !value.is_empty() {
1553                return Ok(value);
1554            }
1555        }
1556        return Ok(String::new());
1557    }
1558
1559    if let Some(alias) = credential_alias.map(str::trim).filter(|a| !a.is_empty()) {
1560        // Try env var with alias
1561        if let Some(key) = resolve_key_from_env_alias(provider, alias) {
1562            return Ok(key);
1563        }
1564        if let Some(key) = kv_getter(&ai_api_secret_path(provider, alias))? {
1565            if !key.trim().is_empty() {
1566                return Ok(key);
1567            }
1568        }
1569        if let Some(secret_ref) = kv_getter(&ai_api_secret_ref_config_key(provider, alias))? {
1570            if let Some(key) = kv_getter(secret_ref.trim())? {
1571                if !key.trim().is_empty() {
1572                    return Ok(key);
1573                }
1574            }
1575        }
1576        let legacy_key = ai_api_legacy_config_key(provider, alias);
1577        if let Some(key) = kv_getter(&legacy_key)? {
1578            if !key.trim().is_empty() {
1579                return Ok(key);
1580            }
1581        }
1582        return Err(crate::RedDBError::Query(format!(
1583            "credential '{alias}' not found for {}. Set env {} or store it in the vault",
1584            provider.token(),
1585            provider.alias_key_env_name(alias)
1586        )));
1587    }
1588
1589    // Default env var
1590    if let Ok(value) = std::env::var(provider.default_key_env_name()) {
1591        let value = value.trim().to_string();
1592        if !value.is_empty() {
1593            return Ok(value);
1594        }
1595    }
1596
1597    if let Some(key) = kv_getter(&ai_api_secret_path(provider, "default"))? {
1598        if !key.trim().is_empty() {
1599            return Ok(key);
1600        }
1601    }
1602    if let Some(secret_ref) = kv_getter(&ai_api_secret_ref_config_key(provider, "default"))? {
1603        if let Some(key) = kv_getter(secret_ref.trim())? {
1604            if !key.trim().is_empty() {
1605                return Ok(key);
1606            }
1607        }
1608    }
1609    if let Some(key) = kv_getter(&ai_api_legacy_config_key(provider, "default"))? {
1610        if !key.trim().is_empty() {
1611            return Ok(key);
1612        }
1613    }
1614
1615    let legacy_short_key = format!("{}/default", provider.token());
1616    if let Some(key) = kv_getter(&legacy_short_key)? {
1617        if !key.trim().is_empty() {
1618            return Ok(key);
1619        }
1620    }
1621
1622    Err(crate::RedDBError::Query(format!(
1623        "missing {} API key. Set {} or provide credential alias",
1624        provider.token(),
1625        provider.default_key_env_name()
1626    )))
1627}
1628
1629pub fn ai_api_secret_path(provider: &AiProvider, alias: &str) -> String {
1630    format!(
1631        "red.secret.ai.{}.{}.api_key",
1632        provider.token(),
1633        normalize_credential_alias_path(alias)
1634    )
1635}
1636
1637pub fn ai_api_secret_ref_config_key(provider: &AiProvider, alias: &str) -> String {
1638    format!(
1639        "red.config.ai.{}.{}.secret_ref",
1640        provider.token(),
1641        normalize_credential_alias_path(alias)
1642    )
1643}
1644
1645pub fn ai_api_legacy_config_key(provider: &AiProvider, alias: &str) -> String {
1646    format!(
1647        "red.config.ai.{}.{}.key",
1648        provider.token(),
1649        normalize_credential_alias_path(alias)
1650    )
1651}
1652
1653fn normalize_credential_alias_path(alias: &str) -> String {
1654    let alias = alias.trim();
1655    if alias.is_empty() {
1656        "default".to_string()
1657    } else {
1658        alias.to_ascii_lowercase()
1659    }
1660}
1661
1662fn resolve_key_from_env_alias(provider: &AiProvider, alias: &str) -> Option<String> {
1663    let env_name = provider.alias_key_env_name(alias);
1664    std::env::var(env_name)
1665        .ok()
1666        .map(|v| v.trim().to_string())
1667        .filter(|v| !v.is_empty())
1668}
1669
1670fn normalize_alias_token(alias: &str) -> String {
1671    let mut out = String::with_capacity(alias.len());
1672    for character in alias.chars() {
1673        if character.is_ascii_alphanumeric() {
1674            out.push(character.to_ascii_uppercase());
1675        } else {
1676            out.push('_');
1677        }
1678    }
1679    while out.contains("__") {
1680        out = out.replace("__", "_");
1681    }
1682    out.trim_matches('_').to_string()
1683}
1684
1685/// Convenience: resolve API key using a RedDBRuntime's KV store.
1686pub fn resolve_api_key_from_runtime(
1687    provider: &AiProvider,
1688    credential_alias: Option<&str>,
1689    runtime: &crate::runtime::RedDBRuntime,
1690) -> crate::RedDBResult<String> {
1691    use crate::application::ports::RuntimeEntityPort;
1692    resolve_api_key(provider, credential_alias, |kv_key| {
1693        if kv_key.starts_with("red.secret.") {
1694            return Ok(runtime.vault_kv_get(kv_key));
1695        }
1696        match runtime.get_kv("red_config", kv_key)? {
1697            Some((crate::storage::schema::Value::Text(secret), _)) => Ok(Some(secret.to_string())),
1698            Some(_) => Ok(None),
1699            None => Ok(None),
1700        }
1701    })
1702}
1703
1704// ============================================================================
1705// HuggingFace Inference API
1706// ============================================================================
1707
1708/// Generate embeddings via HuggingFace Inference API.
1709pub fn huggingface_embeddings(
1710    api_key: &str,
1711    model: &str,
1712    inputs: &[String],
1713    api_base: &str,
1714) -> crate::RedDBResult<OpenAiEmbeddingResponse> {
1715    let url = format!("{api_base}/pipeline/feature-extraction/{model}");
1716    let mut embeddings = Vec::with_capacity(inputs.len());
1717
1718    for input in inputs {
1719        let payload = crate::serde_json::json!({ "inputs": input }).to_string_compact();
1720        let (status, body_str) = http_post_json(&url, api_key, &[], payload, 90)
1721            .map_err(|e| crate::RedDBError::Query(format!("HuggingFace API error: {e}")))?;
1722        if !(200..300).contains(&status) {
1723            return Err(crate::RedDBError::Query(format!(
1724                "HuggingFace API error (status {status}): {body_str}"
1725            )));
1726        }
1727        let body: JsonValue = crate::serde_json::from_str(&body_str).map_err(|e| {
1728            crate::RedDBError::Query(format!("HuggingFace response parse error: {e}"))
1729        })?;
1730
1731        // HF returns [[f32, ...]] for single input
1732        let vector: Vec<f32> = match &body {
1733            JsonValue::Array(outer) => outer
1734                .iter()
1735                .filter_map(|v| v.as_f64().map(|n| n as f32))
1736                .collect(),
1737            _ => {
1738                return Err(crate::RedDBError::Query(
1739                    "unexpected HuggingFace embedding response format".to_string(),
1740                ))
1741            }
1742        };
1743        embeddings.push(vector);
1744    }
1745
1746    Ok(OpenAiEmbeddingResponse {
1747        provider: "huggingface",
1748        model: model.to_string(),
1749        embeddings,
1750        prompt_tokens: None,
1751        total_tokens: None,
1752    })
1753}
1754
1755/// Generate text via HuggingFace Inference API.
1756pub fn huggingface_prompt(
1757    api_key: &str,
1758    model: &str,
1759    prompt: &str,
1760    temperature: Option<f32>,
1761    max_tokens: Option<usize>,
1762    api_base: &str,
1763) -> crate::RedDBResult<AiPromptResponse> {
1764    let url = format!("{api_base}/models/{model}");
1765    let mut params = Map::new();
1766    if let Some(t) = temperature {
1767        params.insert("temperature".into(), JsonValue::Number(t as f64));
1768    }
1769    params.insert(
1770        "max_new_tokens".into(),
1771        JsonValue::Number(max_tokens.unwrap_or(512) as f64),
1772    );
1773    let payload = crate::serde_json::json!({
1774        "inputs": prompt,
1775        "parameters": JsonValue::Object(params)
1776    });
1777
1778    let (status, body_str) =
1779        http_post_json(&url, api_key, &[], payload.to_string_compact(), 120)
1780            .map_err(|e| crate::RedDBError::Query(format!("HuggingFace API error: {e}")))?;
1781    if !(200..300).contains(&status) {
1782        return Err(crate::RedDBError::Query(format!(
1783            "HuggingFace API error (status {status}): {body_str}"
1784        )));
1785    }
1786    let body: JsonValue = crate::serde_json::from_str(&body_str)
1787        .map_err(|e| crate::RedDBError::Query(format!("HuggingFace response parse error: {e}")))?;
1788
1789    let output_text = match &body {
1790        JsonValue::Array(arr) => arr
1791            .first()
1792            .and_then(|v| v.get("generated_text"))
1793            .and_then(JsonValue::as_str)
1794            .unwrap_or("")
1795            .to_string(),
1796        _ => body
1797            .get("generated_text")
1798            .and_then(JsonValue::as_str)
1799            .unwrap_or("")
1800            .to_string(),
1801    };
1802
1803    Ok(AiPromptResponse {
1804        provider: "huggingface",
1805        model: model.to_string(),
1806        output_text,
1807        output_chunks: None,
1808        prompt_tokens: None,
1809        completion_tokens: None,
1810        total_tokens: None,
1811        stop_reason: None,
1812    })
1813}
1814
1815// ============================================================================
1816// Local model stubs (requires 'local-models' feature flag)
1817// ============================================================================
1818
1819/// Local embedding via candle — requires `local-models` feature.
1820pub fn local_embeddings(
1821    _model_id: &str,
1822    _texts: &[String],
1823) -> crate::RedDBResult<OpenAiEmbeddingResponse> {
1824    Err(crate::RedDBError::FeatureNotEnabled(
1825        "local model inference requires the 'local-models' feature flag. \
1826         Build with: cargo build --features local-models. \
1827         Alternatively, use 'ollama' provider with a local Ollama server."
1828            .to_string(),
1829    ))
1830}
1831
1832/// Local prompt via candle — requires `local-models` feature.
1833pub fn local_prompt(_model_id: &str, _prompt: &str) -> crate::RedDBResult<AiPromptResponse> {
1834    Err(crate::RedDBError::FeatureNotEnabled(
1835        "local model inference requires the 'local-models' feature flag. \
1836         Build with: cargo build --features local-models. \
1837         Alternatively, use 'ollama' provider with a local Ollama server."
1838            .to_string(),
1839    ))
1840}
1841
1842// ============================================================================
1843// gRPC input collection — parity with HTTP /ai/embeddings
1844// ============================================================================
1845
1846/// Collect embedding inputs from any of the three supported shapes.
1847///
1848/// * `input: "..."` — single string.
1849/// * `inputs: ["...", ...]` — array of strings.
1850/// * `source_query: "SELECT ..."` — runs a SQL query and projects
1851///   either the named `source_field` from each row (source_mode =
1852///   "row", default) or every string cell of every result row
1853///   (source_mode = "result").
1854fn grpc_collect_embedding_inputs(
1855    runtime: &crate::runtime::RedDBRuntime,
1856    payload: &JsonValue,
1857) -> crate::RedDBResult<Vec<String>> {
1858    if let Some(source_query) = payload
1859        .get("source_query")
1860        .and_then(|v| v.as_str())
1861        .map(str::trim)
1862        .filter(|s| !s.is_empty())
1863    {
1864        return grpc_collect_inputs_from_source_query(runtime, payload, source_query);
1865    }
1866
1867    if let Some(arr) = payload.get("inputs").and_then(|v| v.as_array()) {
1868        let mut out = Vec::with_capacity(arr.len());
1869        for (idx, v) in arr.iter().enumerate() {
1870            let text = v.as_str().ok_or_else(|| {
1871                crate::RedDBError::Query(format!("field 'inputs[{idx}]' must be a string"))
1872            })?;
1873            if text.trim().is_empty() {
1874                return Err(crate::RedDBError::Query(format!(
1875                    "field 'inputs[{idx}]' cannot be empty"
1876                )));
1877            }
1878            out.push(text.to_string());
1879        }
1880        if out.is_empty() {
1881            return Err(crate::RedDBError::Query(
1882                "field 'inputs' must be a non-empty array of strings".to_string(),
1883            ));
1884        }
1885        return Ok(out);
1886    }
1887
1888    if let Some(single) = payload
1889        .get("input")
1890        .and_then(|v| v.as_str())
1891        .map(str::trim)
1892        .filter(|s| !s.is_empty())
1893    {
1894        return Ok(vec![single.to_string()]);
1895    }
1896
1897    Err(crate::RedDBError::Query(
1898        "provide either 'input', 'inputs', or 'source_query'".to_string(),
1899    ))
1900}
1901
1902fn grpc_collect_inputs_from_source_query(
1903    runtime: &crate::runtime::RedDBRuntime,
1904    payload: &JsonValue,
1905    source_query: &str,
1906) -> crate::RedDBResult<Vec<String>> {
1907    let result = runtime
1908        .execute_query(source_query)
1909        .map_err(|err| crate::RedDBError::Query(format!("source_query failed: {err}")))?;
1910
1911    let source_mode = payload
1912        .get("source_mode")
1913        .and_then(|v| v.as_str())
1914        .map(str::trim)
1915        .filter(|s| !s.is_empty())
1916        .unwrap_or("row")
1917        .to_ascii_lowercase();
1918
1919    let mut out: Vec<String> = Vec::new();
1920    match source_mode.as_str() {
1921        "row" => {
1922            let field = payload
1923                .get("source_field")
1924                .and_then(|v| v.as_str())
1925                .map(str::trim)
1926                .filter(|s| !s.is_empty())
1927                .ok_or_else(|| {
1928                    crate::RedDBError::Query(
1929                        "field 'source_field' is required when source_mode='row'".to_string(),
1930                    )
1931                })?;
1932            for rec in &result.result.records {
1933                for (key, value) in rec.iter_fields() {
1934                    if key.as_ref() == field {
1935                        if let crate::storage::schema::Value::Text(text) = value {
1936                            let trimmed = text.trim();
1937                            if !trimmed.is_empty() {
1938                                out.push(trimmed.to_string());
1939                            }
1940                        }
1941                    }
1942                }
1943            }
1944        }
1945        "result" => {
1946            for rec in &result.result.records {
1947                for (_, value) in rec.iter_fields() {
1948                    if let crate::storage::schema::Value::Text(text) = value {
1949                        let trimmed = text.trim();
1950                        if !trimmed.is_empty() {
1951                            out.push(trimmed.to_string());
1952                        }
1953                    }
1954                }
1955            }
1956        }
1957        other => {
1958            return Err(crate::RedDBError::Query(format!(
1959                "field 'source_mode' must be 'row' or 'result' (got '{other}')"
1960            )));
1961        }
1962    }
1963
1964    if out.is_empty() {
1965        return Err(crate::RedDBError::Query(
1966            "source_query produced zero non-empty text inputs".to_string(),
1967        ));
1968    }
1969    Ok(out)
1970}
1971
1972// ============================================================================
1973// gRPC stubs — delegate to the same logic as HTTP handlers
1974// ============================================================================
1975
1976/// gRPC embeddings — shared entrypoint that mirrors the HTTP handler.
1977///
1978/// Accepts the same JSON payload shape as `POST /ai/embeddings`:
1979///
1980/// ```json
1981/// { "provider": "openai", "model": "text-embedding-3-small",
1982///   "inputs": ["hello", "world"], "credential": "optional-alias" }
1983/// ```
1984///
1985/// Input shapes at parity with HTTP: `input` (single string),
1986/// `inputs` (array of strings), and `source_query` (SQL that the
1987/// runtime executes to materialise the input texts; `source_mode`
1988/// = `row` needs `source_collection` + `source_field`, `result`
1989/// uses the projected columns). Returns a JSON object with
1990/// `provider`, `model`, `embeddings`, `prompt_tokens`,
1991/// `total_tokens`. Non-OpenAI-compatible providers are rejected
1992/// with a clear message, matching the HTTP handler.
1993pub fn grpc_embeddings(
1994    runtime: &crate::runtime::RedDBRuntime,
1995    payload: &JsonValue,
1996) -> crate::RedDBResult<JsonValue> {
1997    let provider_name = payload
1998        .get("provider")
1999        .and_then(|v| v.as_str())
2000        .map(str::trim)
2001        .filter(|s| !s.is_empty())
2002        .unwrap_or("openai");
2003    let provider = parse_provider(provider_name)?;
2004    // Routing matrix mirrors `handle_ai_embeddings`. See that function
2005    // for the rationale; in short: HuggingFace gets its own wire
2006    // shape, Anthropic fails fast (no embeddings product), and Local
2007    // requires a build-time feature flag.
2008    match &provider {
2009        AiProvider::Anthropic => {
2010            return Err(crate::RedDBError::Query(
2011                "Anthropic does not offer an embeddings API. \
2012                 Re-issue the request against an OpenAI-compatible \
2013                 provider (openai, groq, ollama, openrouter, together, \
2014                 venice, deepseek), HuggingFace, or a custom base URL — \
2015                 RedDB does not silently route embeddings to a \
2016                 different provider than the one you named."
2017                    .to_string(),
2018            ));
2019        }
2020        AiProvider::Local => {
2021            return Err(crate::RedDBError::Query(
2022                "Local embeddings require the `local-models` feature \
2023                 flag at engine build time."
2024                    .to_string(),
2025            ));
2026        }
2027        _ => {}
2028    }
2029
2030    let inputs: Vec<String> = grpc_collect_embedding_inputs(runtime, payload)?;
2031
2032    let model = payload
2033        .get("model")
2034        .and_then(|v| v.as_str())
2035        .map(str::trim)
2036        .filter(|s| !s.is_empty())
2037        .map(str::to_string)
2038        .or_else(|| {
2039            std::env::var(format!(
2040                "REDDB_{}_EMBEDDING_MODEL",
2041                provider.token().to_ascii_uppercase()
2042            ))
2043            .ok()
2044        })
2045        .or_else(|| std::env::var("REDDB_OPENAI_EMBEDDING_MODEL").ok())
2046        .filter(|v| !v.trim().is_empty())
2047        .unwrap_or_else(|| provider.default_embedding_model().to_string());
2048
2049    let credential = payload
2050        .get("credential")
2051        .and_then(|v| v.as_str())
2052        .map(str::to_string);
2053    let api_key = resolve_api_key_from_runtime(&provider, credential.as_deref(), runtime)?;
2054
2055    let dimensions = payload
2056        .get("dimensions")
2057        .and_then(|v| v.as_i64())
2058        .and_then(|v| usize::try_from(v).ok())
2059        .filter(|v| *v > 0);
2060
2061    let response = match &provider {
2062        AiProvider::HuggingFace => {
2063            huggingface_embeddings(&api_key, &model, &inputs, &provider.resolve_api_base())?
2064        }
2065        _ => {
2066            let transport = crate::runtime::ai::transport::AiTransport::from_runtime(runtime);
2067            let request = OpenAiEmbeddingRequest {
2068                api_key,
2069                model,
2070                inputs,
2071                dimensions,
2072                api_base: provider.resolve_api_base(),
2073            };
2074            crate::runtime::ai::block_on_ai(async move {
2075                openai_embeddings_async(&transport, request).await
2076            })
2077            .and_then(|result| result)?
2078        }
2079    };
2080
2081    let embeddings_json: Vec<JsonValue> = response
2082        .embeddings
2083        .into_iter()
2084        .map(|vec| {
2085            JsonValue::Array(
2086                vec.into_iter()
2087                    .map(|f| JsonValue::Number(f as f64))
2088                    .collect(),
2089            )
2090        })
2091        .collect();
2092
2093    let mut obj = Map::new();
2094    obj.insert(
2095        "provider".to_string(),
2096        JsonValue::String(response.provider.to_string()),
2097    );
2098    obj.insert("model".to_string(), JsonValue::String(response.model));
2099    obj.insert("embeddings".to_string(), JsonValue::Array(embeddings_json));
2100    if let Some(pt) = response.prompt_tokens {
2101        obj.insert("prompt_tokens".to_string(), JsonValue::Number(pt as f64));
2102    }
2103    if let Some(tt) = response.total_tokens {
2104        obj.insert("total_tokens".to_string(), JsonValue::Number(tt as f64));
2105    }
2106    Ok(JsonValue::Object(obj))
2107}
2108
2109/// gRPC stub for AI prompt.
2110pub fn grpc_prompt(
2111    _runtime: &crate::runtime::RedDBRuntime,
2112    _payload: &JsonValue,
2113) -> crate::RedDBResult<JsonValue> {
2114    Err(crate::RedDBError::FeatureNotEnabled(
2115        "AI prompt via gRPC requires HTTP endpoint; use POST /ai/prompt".to_string(),
2116    ))
2117}
2118
2119/// gRPC stub for AI credentials.
2120pub fn grpc_credentials(
2121    _runtime: &crate::runtime::RedDBRuntime,
2122    _payload: &JsonValue,
2123) -> crate::RedDBResult<JsonValue> {
2124    Err(crate::RedDBError::FeatureNotEnabled(
2125        "AI credentials via gRPC requires HTTP endpoint; use POST /ai/credentials".to_string(),
2126    ))
2127}