Skip to main content

reddb_server/
ai.rs

1//! External AI provider integration primitives.
2//!
3//! This module currently supports OpenAI embeddings and is intended to be
4//! consumed by server handlers and future query/runtime integrations.
5
6use std::time::Duration;
7
8use crate::json::{parse_json, Map, Value as JsonValue};
9use crate::{RedDBError, RedDBResult};
10
11/// Shared HTTP helper for every outbound AI provider call. Centralises
12/// the ureq 3.x builder and error conversion. Returns
13/// `(status, body)` even on 4xx/5xx (via
14/// `http_status_as_error(false)`) so callers can format a
15/// provider-specific error without re-plumbing the 3.x error enum.
16fn http_post_json(
17    url: &str,
18    api_key: &str,
19    extra_headers: &[(&str, &str)],
20    payload: String,
21    read_timeout_secs: u64,
22) -> Result<(u16, String), String> {
23    let agent: ureq::Agent = ureq::Agent::config_builder()
24        .timeout_connect(Some(Duration::from_secs(10)))
25        .timeout_send_request(Some(Duration::from_secs(30)))
26        .timeout_recv_response(Some(Duration::from_secs(read_timeout_secs)))
27        .timeout_recv_body(Some(Duration::from_secs(read_timeout_secs)))
28        .http_status_as_error(false)
29        .build()
30        .into();
31
32    let mut req = agent
33        .post(url)
34        .header("Content-Type", "application/json")
35        .header("Accept", "application/json");
36    for (k, v) in extra_headers {
37        req = req.header(*k, *v);
38    }
39    let trimmed_key = api_key.trim();
40    if !trimmed_key.is_empty() {
41        req = req.header("Authorization", &format!("Bearer {}", trimmed_key));
42    }
43
44    match req.send(payload) {
45        Ok(mut resp) => {
46            let status = resp.status().as_u16();
47            let body = resp
48                .body_mut()
49                .read_to_string()
50                .map_err(|err| format!("failed to read response body: {err}"))?;
51            Ok((status, body))
52        }
53        Err(err) => Err(format!("{err}")),
54    }
55}
56
57pub const DEFAULT_OPENAI_EMBEDDING_MODEL: &str = "text-embedding-3-small";
58pub const DEFAULT_OPENAI_API_BASE: &str = "https://api.openai.com/v1";
59pub const DEFAULT_OPENAI_PROMPT_MODEL: &str = "gpt-4.1-mini";
60pub const DEFAULT_ANTHROPIC_PROMPT_MODEL: &str = "claude-3-5-haiku-latest";
61pub const DEFAULT_ANTHROPIC_API_BASE: &str = "https://api.anthropic.com/v1";
62pub const DEFAULT_ANTHROPIC_VERSION: &str = "2023-06-01";
63
64#[derive(Debug, Clone)]
65pub struct OpenAiEmbeddingRequest {
66    pub api_key: String,
67    pub model: String,
68    pub inputs: Vec<String>,
69    pub dimensions: Option<usize>,
70    pub api_base: String,
71}
72
73#[derive(Debug, Clone)]
74pub struct OpenAiEmbeddingResponse {
75    pub provider: &'static str,
76    pub model: String,
77    pub embeddings: Vec<Vec<f32>>,
78    pub prompt_tokens: Option<u64>,
79    pub total_tokens: Option<u64>,
80}
81
82#[derive(Debug, Clone)]
83pub struct OpenAiPromptRequest {
84    pub api_key: String,
85    pub model: String,
86    pub prompt: String,
87    pub temperature: Option<f32>,
88    pub max_output_tokens: Option<usize>,
89    pub api_base: String,
90}
91
92#[derive(Debug, Clone)]
93pub struct AnthropicPromptRequest {
94    pub api_key: String,
95    pub model: String,
96    pub prompt: String,
97    pub temperature: Option<f32>,
98    pub max_output_tokens: Option<usize>,
99    pub api_base: String,
100    pub anthropic_version: String,
101}
102
103#[derive(Debug, Clone)]
104pub struct AiPromptResponse {
105    pub provider: &'static str,
106    pub model: String,
107    pub output_text: String,
108    pub prompt_tokens: Option<u64>,
109    pub completion_tokens: Option<u64>,
110    pub total_tokens: Option<u64>,
111    pub stop_reason: Option<String>,
112}
113
114#[deprecated(
115    since = "1.0.0",
116    note = "use AiBatchClient::embed_batch for embeddings or openai_embeddings_async with AiTransport when token usage metadata is required"
117)]
118pub fn openai_embeddings(request: OpenAiEmbeddingRequest) -> RedDBResult<OpenAiEmbeddingResponse> {
119    if request.model.trim().is_empty() {
120        return Err(RedDBError::Query(
121            "OpenAI embedding model cannot be empty".to_string(),
122        ));
123    }
124    if request.inputs.is_empty() {
125        return Err(RedDBError::Query(
126            "at least one input is required for embeddings".to_string(),
127        ));
128    }
129
130    let url = format!("{}/embeddings", request.api_base.trim_end_matches('/'));
131    let payload =
132        build_openai_embedding_payload(&request.model, &request.inputs, request.dimensions);
133
134    let (status, body) = http_post_json(&url, &request.api_key, &[], payload, 90)
135        .map_err(|err| RedDBError::Query(format!("OpenAI transport error: {err}")))?;
136
137    if !(200..300).contains(&status) {
138        let message = openai_error_message(&body)
139            .unwrap_or_else(|| "OpenAI embeddings request failed".to_string());
140        return Err(RedDBError::Query(format!(
141            "OpenAI embeddings request failed (status {status}): {message}"
142        )));
143    }
144
145    parse_openai_embedding_response(&body)
146}
147
148#[deprecated(since = "1.0.0", note = "use openai_prompt_async with AiTransport")]
149pub fn openai_prompt(request: OpenAiPromptRequest) -> RedDBResult<AiPromptResponse> {
150    if request.model.trim().is_empty() {
151        return Err(RedDBError::Query(
152            "OpenAI prompt model cannot be empty".to_string(),
153        ));
154    }
155    if request.prompt.trim().is_empty() {
156        return Err(RedDBError::Query("prompt cannot be empty".to_string()));
157    }
158
159    let url = format!(
160        "{}/chat/completions",
161        request.api_base.trim_end_matches('/')
162    );
163    let payload = build_openai_prompt_payload(
164        &request.model,
165        &request.prompt,
166        request.temperature,
167        request.max_output_tokens,
168    );
169
170    let (status, body) = http_post_json(&url, &request.api_key, &[], payload, 120)
171        .map_err(|err| RedDBError::Query(format!("OpenAI transport error: {err}")))?;
172
173    if !(200..300).contains(&status) {
174        let message = openai_error_message(&body)
175            .unwrap_or_else(|| "OpenAI prompt request failed".to_string());
176        return Err(RedDBError::Query(format!(
177            "OpenAI prompt request failed (status {status}): {message}"
178        )));
179    }
180
181    parse_openai_prompt_response(&body, &request.model)
182}
183
184#[deprecated(since = "1.0.0", note = "use anthropic_prompt_async with AiTransport")]
185pub fn anthropic_prompt(request: AnthropicPromptRequest) -> RedDBResult<AiPromptResponse> {
186    if request.api_key.trim().is_empty() {
187        return Err(RedDBError::Query(
188            "Anthropic API key cannot be empty".to_string(),
189        ));
190    }
191    if request.model.trim().is_empty() {
192        return Err(RedDBError::Query(
193            "Anthropic model cannot be empty".to_string(),
194        ));
195    }
196    if request.prompt.trim().is_empty() {
197        return Err(RedDBError::Query("prompt cannot be empty".to_string()));
198    }
199
200    let url = format!("{}/messages", request.api_base.trim_end_matches('/'));
201    let payload = build_anthropic_prompt_payload(
202        &request.model,
203        &request.prompt,
204        request.temperature,
205        request.max_output_tokens,
206    );
207
208    // Anthropic uses its own `x-api-key` header instead of
209    // `Authorization: Bearer`, so skip the shared helper's default
210    // auth header path — we pass an empty API key and set
211    // `x-api-key` via extra_headers instead.
212    let extra = [
213        ("x-api-key", request.api_key.as_str()),
214        ("anthropic-version", request.anthropic_version.as_str()),
215    ];
216    let (status, body) = http_post_json(&url, "", &extra, payload, 120)
217        .map_err(|err| RedDBError::Query(format!("Anthropic transport error: {err}")))?;
218
219    if !(200..300).contains(&status) {
220        let message = anthropic_error_message(&body)
221            .unwrap_or_else(|| "Anthropic prompt request failed".to_string());
222        return Err(RedDBError::Query(format!(
223            "Anthropic prompt request failed (status {status}): {message}"
224        )));
225    }
226
227    parse_anthropic_prompt_response(&body, &request.model)
228}
229
230/// Async OpenAI-compatible embeddings via [`AiTransport`].
231///
232/// Uses the transport's connection pool and retry policy (429/5xx backoff)
233/// instead of the deprecated one-shot blocking path.
234pub async fn openai_embeddings_async(
235    transport: &crate::runtime::ai::transport::AiTransport,
236    request: OpenAiEmbeddingRequest,
237) -> RedDBResult<OpenAiEmbeddingResponse> {
238    if request.model.trim().is_empty() {
239        return Err(RedDBError::Query(
240            "OpenAI embedding model cannot be empty".to_string(),
241        ));
242    }
243    if request.inputs.is_empty() {
244        return Err(RedDBError::Query(
245            "at least one input is required for embeddings".to_string(),
246        ));
247    }
248
249    let url = format!("{}/embeddings", request.api_base.trim_end_matches('/'));
250    let payload =
251        build_openai_embedding_payload(&request.model, &request.inputs, request.dimensions);
252    let mut http_req =
253        crate::runtime::ai::transport::AiHttpRequest::post_json("openai-compatible", url, payload);
254    let trimmed_key = request.api_key.trim();
255    if !trimmed_key.is_empty() {
256        http_req = http_req.header("authorization", format!("Bearer {}", trimmed_key));
257    }
258
259    let response = transport
260        .request(http_req)
261        .await
262        .map_err(|e| RedDBError::Query(e.to_string()))?;
263
264    parse_openai_embedding_response(&response.body)
265}
266
267/// Async OpenAI chat-completion prompt via [`AiTransport`].
268///
269/// Uses the transport's connection pool and retry policy (429/5xx backoff)
270/// instead of the deprecated one-shot blocking path.
271pub async fn openai_prompt_async(
272    transport: &crate::runtime::ai::transport::AiTransport,
273    request: OpenAiPromptRequest,
274) -> RedDBResult<AiPromptResponse> {
275    if request.model.trim().is_empty() {
276        return Err(RedDBError::Query(
277            "OpenAI prompt model cannot be empty".to_string(),
278        ));
279    }
280    if request.prompt.trim().is_empty() {
281        return Err(RedDBError::Query("prompt cannot be empty".to_string()));
282    }
283
284    let url = format!(
285        "{}/chat/completions",
286        request.api_base.trim_end_matches('/')
287    );
288    let payload = build_openai_prompt_payload(
289        &request.model,
290        &request.prompt,
291        request.temperature,
292        request.max_output_tokens,
293    );
294    let http_req = crate::runtime::ai::transport::AiHttpRequest::post_json("openai", url, payload)
295        .model(request.model.clone())
296        .header("authorization", format!("Bearer {}", request.api_key));
297
298    let response = transport
299        .request(http_req)
300        .await
301        .map_err(|e| RedDBError::Query(e.to_string()))?;
302
303    parse_openai_prompt_response(&response.body, &request.model)
304}
305
306/// Async Anthropic messages-API prompt via [`AiTransport`].
307///
308/// Uses the transport's connection pool and retry policy (429/5xx backoff)
309/// instead of the deprecated one-shot blocking path.
310pub async fn anthropic_prompt_async(
311    transport: &crate::runtime::ai::transport::AiTransport,
312    request: AnthropicPromptRequest,
313) -> RedDBResult<AiPromptResponse> {
314    if request.api_key.trim().is_empty() {
315        return Err(RedDBError::Query(
316            "Anthropic API key cannot be empty".to_string(),
317        ));
318    }
319    if request.model.trim().is_empty() {
320        return Err(RedDBError::Query(
321            "Anthropic model cannot be empty".to_string(),
322        ));
323    }
324    if request.prompt.trim().is_empty() {
325        return Err(RedDBError::Query("prompt cannot be empty".to_string()));
326    }
327
328    let url = format!("{}/messages", request.api_base.trim_end_matches('/'));
329    let payload = build_anthropic_prompt_payload(
330        &request.model,
331        &request.prompt,
332        request.temperature,
333        request.max_output_tokens,
334    );
335    let http_req =
336        crate::runtime::ai::transport::AiHttpRequest::post_json("anthropic", url, payload)
337            .model(request.model.clone())
338            .header("x-api-key", request.api_key)
339            .header("anthropic-version", request.anthropic_version);
340
341    let response = transport
342        .request(http_req)
343        .await
344        .map_err(|e| RedDBError::Query(e.to_string()))?;
345
346    parse_anthropic_prompt_response(&response.body, &request.model)
347}
348
349/// Build an OpenAI-compatible embedding request payload.
350pub(crate) fn build_embedding_payload(model: &str, inputs: &[String]) -> String {
351    build_openai_embedding_payload(model, inputs, None)
352}
353
354/// Parse an OpenAI-compatible embedding response, returning only the vectors.
355pub(crate) fn parse_embedding_vectors(body: &str) -> Result<Vec<Vec<f32>>, String> {
356    parse_openai_embedding_response(body)
357        .map(|r| r.embeddings)
358        .map_err(|e| e.to_string())
359}
360
361pub(crate) fn parse_embedding_response(body: &str) -> Result<OpenAiEmbeddingResponse, String> {
362    parse_openai_embedding_response(body).map_err(|e| e.to_string())
363}
364
365fn build_openai_embedding_payload(
366    model: &str,
367    inputs: &[String],
368    dimensions: Option<usize>,
369) -> String {
370    let mut object = Map::new();
371    object.insert("model".to_string(), JsonValue::String(model.to_string()));
372    if inputs.len() == 1 {
373        object.insert("input".to_string(), JsonValue::String(inputs[0].clone()));
374    } else {
375        object.insert(
376            "input".to_string(),
377            JsonValue::Array(inputs.iter().cloned().map(JsonValue::String).collect()),
378        );
379    }
380    if let Some(dimensions) = dimensions {
381        object.insert(
382            "dimensions".to_string(),
383            JsonValue::Number(dimensions as f64),
384        );
385    }
386    object.insert(
387        "encoding_format".to_string(),
388        JsonValue::String("float".to_string()),
389    );
390    JsonValue::Object(object).to_string_compact()
391}
392
393fn openai_error_message(body: &str) -> Option<String> {
394    provider_error_message(body)
395}
396
397fn anthropic_error_message(body: &str) -> Option<String> {
398    provider_error_message(body)
399}
400
401fn provider_error_message(body: &str) -> Option<String> {
402    let parsed = parse_json(body).ok().map(JsonValue::from)?;
403    let error = parsed.get("error")?;
404    if let Some(message) = error.get("message").and_then(JsonValue::as_str) {
405        let trimmed = message.trim();
406        if !trimmed.is_empty() {
407            return Some(trimmed.to_string());
408        }
409    }
410    None
411}
412
413fn build_openai_prompt_payload(
414    model: &str,
415    prompt: &str,
416    temperature: Option<f32>,
417    max_output_tokens: Option<usize>,
418) -> String {
419    let mut object = Map::new();
420    object.insert("model".to_string(), JsonValue::String(model.to_string()));
421
422    let mut message = Map::new();
423    message.insert("role".to_string(), JsonValue::String("user".to_string()));
424    message.insert("content".to_string(), JsonValue::String(prompt.to_string()));
425    object.insert(
426        "messages".to_string(),
427        JsonValue::Array(vec![JsonValue::Object(message)]),
428    );
429
430    if let Some(temperature) = temperature {
431        object.insert(
432            "temperature".to_string(),
433            JsonValue::Number(temperature as f64),
434        );
435    }
436
437    if let Some(max_output_tokens) = max_output_tokens {
438        object.insert(
439            "max_tokens".to_string(),
440            JsonValue::Number(max_output_tokens as f64),
441        );
442    }
443
444    JsonValue::Object(object).to_string_compact()
445}
446
447fn build_anthropic_prompt_payload(
448    model: &str,
449    prompt: &str,
450    temperature: Option<f32>,
451    max_output_tokens: Option<usize>,
452) -> String {
453    let mut object = Map::new();
454    object.insert("model".to_string(), JsonValue::String(model.to_string()));
455    object.insert(
456        "max_tokens".to_string(),
457        JsonValue::Number(max_output_tokens.unwrap_or(512) as f64),
458    );
459
460    let mut message = Map::new();
461    message.insert("role".to_string(), JsonValue::String("user".to_string()));
462    message.insert("content".to_string(), JsonValue::String(prompt.to_string()));
463    object.insert(
464        "messages".to_string(),
465        JsonValue::Array(vec![JsonValue::Object(message)]),
466    );
467
468    if let Some(temperature) = temperature {
469        object.insert(
470            "temperature".to_string(),
471            JsonValue::Number(temperature as f64),
472        );
473    }
474
475    JsonValue::Object(object).to_string_compact()
476}
477
478fn extract_text_from_parts(parts: &[JsonValue]) -> Option<String> {
479    let mut chunks = Vec::new();
480    for part in parts {
481        if let Some(text) = part.as_str() {
482            let trimmed = text.trim();
483            if !trimmed.is_empty() {
484                chunks.push(trimmed.to_string());
485            }
486            continue;
487        }
488
489        let Some(object) = part.as_object() else {
490            continue;
491        };
492        let Some(text) = object.get("text").and_then(JsonValue::as_str) else {
493            continue;
494        };
495        let trimmed = text.trim();
496        if !trimmed.is_empty() {
497            chunks.push(trimmed.to_string());
498        }
499    }
500
501    if chunks.is_empty() {
502        None
503    } else {
504        Some(chunks.join("\n\n"))
505    }
506}
507
508fn parse_openai_prompt_response(
509    body: &str,
510    requested_model: &str,
511) -> RedDBResult<AiPromptResponse> {
512    let parsed = parse_json(body)
513        .map_err(|err| RedDBError::Query(format!("invalid OpenAI prompt JSON response: {err}")))?;
514    let json = JsonValue::from(parsed);
515
516    let model = json
517        .get("model")
518        .and_then(JsonValue::as_str)
519        .unwrap_or(requested_model)
520        .to_string();
521
522    let Some(choices) = json.get("choices").and_then(JsonValue::as_array) else {
523        return Err(RedDBError::Query(
524            "OpenAI prompt response missing 'choices' array".to_string(),
525        ));
526    };
527    let Some(first_choice) = choices.first() else {
528        return Err(RedDBError::Query(
529            "OpenAI prompt response contains no choices".to_string(),
530        ));
531    };
532
533    let output_text = first_choice
534        .get("message")
535        .and_then(|message| {
536            if let Some(text) = message.get("content").and_then(JsonValue::as_str) {
537                let trimmed = text.trim();
538                if !trimmed.is_empty() {
539                    return Some(trimmed.to_string());
540                }
541            }
542            message
543                .get("content")
544                .and_then(JsonValue::as_array)
545                .and_then(extract_text_from_parts)
546        })
547        .ok_or_else(|| {
548            RedDBError::Query("OpenAI prompt response missing text content".to_string())
549        })?;
550
551    let prompt_tokens = json
552        .get("usage")
553        .and_then(|usage| usage.get("prompt_tokens"))
554        .and_then(JsonValue::as_i64)
555        .and_then(|value| u64::try_from(value).ok());
556    let completion_tokens = json
557        .get("usage")
558        .and_then(|usage| usage.get("completion_tokens"))
559        .and_then(JsonValue::as_i64)
560        .and_then(|value| u64::try_from(value).ok());
561    let total_tokens = json
562        .get("usage")
563        .and_then(|usage| usage.get("total_tokens"))
564        .and_then(JsonValue::as_i64)
565        .and_then(|value| u64::try_from(value).ok())
566        .or_else(|| match (prompt_tokens, completion_tokens) {
567            (Some(prompt), Some(completion)) => Some(prompt.saturating_add(completion)),
568            _ => None,
569        });
570
571    let stop_reason = first_choice
572        .get("finish_reason")
573        .and_then(JsonValue::as_str)
574        .map(str::to_string);
575
576    Ok(AiPromptResponse {
577        provider: "openai",
578        model,
579        output_text,
580        prompt_tokens,
581        completion_tokens,
582        total_tokens,
583        stop_reason,
584    })
585}
586
587fn parse_anthropic_prompt_response(
588    body: &str,
589    requested_model: &str,
590) -> RedDBResult<AiPromptResponse> {
591    let parsed = parse_json(body).map_err(|err| {
592        RedDBError::Query(format!("invalid Anthropic prompt JSON response: {err}"))
593    })?;
594    let json = JsonValue::from(parsed);
595
596    let model = json
597        .get("model")
598        .and_then(JsonValue::as_str)
599        .unwrap_or(requested_model)
600        .to_string();
601
602    let Some(content_parts) = json.get("content").and_then(JsonValue::as_array) else {
603        return Err(RedDBError::Query(
604            "Anthropic prompt response missing 'content' array".to_string(),
605        ));
606    };
607
608    let output_text = extract_text_from_parts(content_parts).ok_or_else(|| {
609        RedDBError::Query("Anthropic prompt response missing text content".to_string())
610    })?;
611
612    let prompt_tokens = json
613        .get("usage")
614        .and_then(|usage| usage.get("input_tokens"))
615        .and_then(JsonValue::as_i64)
616        .and_then(|value| u64::try_from(value).ok());
617    let completion_tokens = json
618        .get("usage")
619        .and_then(|usage| usage.get("output_tokens"))
620        .and_then(JsonValue::as_i64)
621        .and_then(|value| u64::try_from(value).ok());
622    let total_tokens = match (prompt_tokens, completion_tokens) {
623        (Some(prompt), Some(completion)) => Some(prompt.saturating_add(completion)),
624        _ => None,
625    };
626
627    let stop_reason = json
628        .get("stop_reason")
629        .and_then(JsonValue::as_str)
630        .map(str::to_string);
631
632    Ok(AiPromptResponse {
633        provider: "anthropic",
634        model,
635        output_text,
636        prompt_tokens,
637        completion_tokens,
638        total_tokens,
639        stop_reason,
640    })
641}
642
643fn parse_openai_embedding_response(body: &str) -> RedDBResult<OpenAiEmbeddingResponse> {
644    let parsed = parse_json(body).map_err(|err| {
645        RedDBError::Query(format!("invalid OpenAI embeddings JSON response: {err}"))
646    })?;
647    let json = JsonValue::from(parsed);
648
649    let model = json
650        .get("model")
651        .and_then(JsonValue::as_str)
652        .unwrap_or(DEFAULT_OPENAI_EMBEDDING_MODEL)
653        .to_string();
654
655    let Some(data) = json.get("data").and_then(JsonValue::as_array) else {
656        return Err(RedDBError::Query(
657            "OpenAI response missing 'data' array".to_string(),
658        ));
659    };
660
661    let mut rows: Vec<(usize, Vec<f32>)> = Vec::with_capacity(data.len());
662    for (position, item) in data.iter().enumerate() {
663        let index = item
664            .get("index")
665            .and_then(JsonValue::as_i64)
666            .and_then(|value| usize::try_from(value).ok())
667            .unwrap_or(position);
668
669        let Some(embedding_values) = item.get("embedding").and_then(JsonValue::as_array) else {
670            return Err(RedDBError::Query(
671                "OpenAI response contains item without 'embedding' array".to_string(),
672            ));
673        };
674        if embedding_values.is_empty() {
675            return Err(RedDBError::Query(
676                "OpenAI response contains empty embedding vector".to_string(),
677            ));
678        }
679
680        let mut embedding = Vec::with_capacity(embedding_values.len());
681        for value in embedding_values {
682            let Some(number) = value.as_f64() else {
683                return Err(RedDBError::Query(
684                    "OpenAI response contains non-numeric embedding value".to_string(),
685                ));
686            };
687            embedding.push(number as f32);
688        }
689        rows.push((index, embedding));
690    }
691    rows.sort_by_key(|(index, _)| *index);
692    let embeddings = rows.into_iter().map(|(_, embedding)| embedding).collect();
693
694    let prompt_tokens = json
695        .get("usage")
696        .and_then(|usage| usage.get("prompt_tokens"))
697        .and_then(JsonValue::as_i64)
698        .and_then(|value| u64::try_from(value).ok());
699    let total_tokens = json
700        .get("usage")
701        .and_then(|usage| usage.get("total_tokens"))
702        .and_then(JsonValue::as_i64)
703        .and_then(|value| u64::try_from(value).ok());
704
705    Ok(OpenAiEmbeddingResponse {
706        provider: "openai",
707        model,
708        embeddings,
709        prompt_tokens,
710        total_tokens,
711    })
712}
713
714#[cfg(test)]
715mod tests {
716    use super::*;
717
718    #[test]
719    fn parse_openai_embedding_response_extracts_vectors() {
720        let body = r#"{
721          "object":"list",
722          "data":[
723            {"object":"embedding","index":1,"embedding":[0.3,0.4]},
724            {"object":"embedding","index":0,"embedding":[0.1,0.2]}
725          ],
726          "model":"text-embedding-3-small",
727          "usage":{"prompt_tokens":12,"total_tokens":12}
728        }"#;
729
730        let result = parse_openai_embedding_response(body).expect("response should parse");
731        assert_eq!(result.provider, "openai");
732        assert_eq!(result.model, "text-embedding-3-small");
733        assert_eq!(result.embeddings.len(), 2);
734        assert_eq!(result.embeddings[0], vec![0.1, 0.2]);
735        assert_eq!(result.embeddings[1], vec![0.3, 0.4]);
736        assert_eq!(result.prompt_tokens, Some(12));
737        assert_eq!(result.total_tokens, Some(12));
738    }
739
740    #[test]
741    fn openai_error_message_extracts_nested_message() {
742        let body = r#"{"error":{"message":"bad api key","type":"invalid_request_error"}}"#;
743        assert_eq!(openai_error_message(body).as_deref(), Some("bad api key"));
744    }
745
746    #[test]
747    fn parse_openai_prompt_response_extracts_text_and_usage() {
748        let body = r#"{
749          "id":"chatcmpl_1",
750          "object":"chat.completion",
751          "model":"gpt-4.1-mini",
752          "choices":[
753            {
754              "index":0,
755              "finish_reason":"stop",
756              "message":{"role":"assistant","content":"Resumo pronto."}
757            }
758          ],
759          "usage":{"prompt_tokens":10,"completion_tokens":4,"total_tokens":14}
760        }"#;
761
762        let parsed =
763            parse_openai_prompt_response(body, DEFAULT_OPENAI_PROMPT_MODEL).expect("parse");
764        assert_eq!(parsed.provider, "openai");
765        assert_eq!(parsed.model, "gpt-4.1-mini");
766        assert_eq!(parsed.output_text, "Resumo pronto.");
767        assert_eq!(parsed.prompt_tokens, Some(10));
768        assert_eq!(parsed.completion_tokens, Some(4));
769        assert_eq!(parsed.total_tokens, Some(14));
770        assert_eq!(parsed.stop_reason.as_deref(), Some("stop"));
771    }
772
773    #[test]
774    fn parse_anthropic_prompt_response_extracts_text_and_usage() {
775        let body = r#"{
776          "id":"msg_1",
777          "model":"claude-3-5-haiku-latest",
778          "type":"message",
779          "content":[{"type":"text","text":"Action complete."}],
780          "usage":{"input_tokens":11,"output_tokens":5},
781          "stop_reason":"end_turn"
782        }"#;
783
784        let parsed =
785            parse_anthropic_prompt_response(body, DEFAULT_ANTHROPIC_PROMPT_MODEL).expect("parse");
786        assert_eq!(parsed.provider, "anthropic");
787        assert_eq!(parsed.model, "claude-3-5-haiku-latest");
788        assert_eq!(parsed.output_text, "Action complete.");
789        assert_eq!(parsed.prompt_tokens, Some(11));
790        assert_eq!(parsed.completion_tokens, Some(5));
791        assert_eq!(parsed.total_tokens, Some(16));
792        assert_eq!(parsed.stop_reason.as_deref(), Some("end_turn"));
793    }
794
795    #[test]
796    fn resolve_api_key_prefers_vault_secret_over_legacy_config() {
797        let provider = AiProvider::OpenAi;
798        let alias = "vault_unit_alias";
799        let secret_path = ai_api_secret_path(&provider, alias);
800        let legacy_key = ai_api_legacy_config_key(&provider, alias);
801
802        let resolved = resolve_api_key(&provider, Some(alias), |key| {
803            if key == secret_path {
804                Ok(Some("vault-key".to_string()))
805            } else if key == legacy_key {
806                Ok(Some("legacy-key".to_string()))
807            } else {
808                Ok(None)
809            }
810        })
811        .expect("resolve");
812
813        assert_eq!(resolved, "vault-key");
814    }
815
816    #[test]
817    fn resolve_api_key_uses_default_vault_secret_path() {
818        let provider = AiProvider::OpenAi;
819        let secret_path = ai_api_secret_path(&provider, "default");
820
821        let resolved = resolve_api_key(&provider, None, |key| {
822            if key == secret_path {
823                Ok(Some("default-vault-key".to_string()))
824            } else {
825                Ok(None)
826            }
827        })
828        .expect("resolve");
829
830        assert_eq!(resolved, "default-vault-key");
831    }
832
833    #[tokio::test]
834    async fn openai_prompt_async_rejects_empty_model() {
835        let transport = crate::runtime::ai::transport::AiTransport::new(Default::default());
836        let request = OpenAiPromptRequest {
837            api_key: "key".to_string(),
838            model: "  ".to_string(),
839            prompt: "hello".to_string(),
840            temperature: None,
841            max_output_tokens: None,
842            api_base: "https://api.openai.com/v1".to_string(),
843        };
844        let err = openai_prompt_async(&transport, request).await.unwrap_err();
845        assert!(err.to_string().contains("model cannot be empty"));
846    }
847
848    #[tokio::test]
849    async fn openai_prompt_async_rejects_empty_prompt() {
850        let transport = crate::runtime::ai::transport::AiTransport::new(Default::default());
851        let request = OpenAiPromptRequest {
852            api_key: "key".to_string(),
853            model: "gpt-4.1-mini".to_string(),
854            prompt: "".to_string(),
855            temperature: None,
856            max_output_tokens: None,
857            api_base: "https://api.openai.com/v1".to_string(),
858        };
859        let err = openai_prompt_async(&transport, request).await.unwrap_err();
860        assert!(err.to_string().contains("prompt cannot be empty"));
861    }
862
863    #[tokio::test]
864    async fn anthropic_prompt_async_rejects_empty_api_key() {
865        let transport = crate::runtime::ai::transport::AiTransport::new(Default::default());
866        let request = AnthropicPromptRequest {
867            api_key: "  ".to_string(),
868            model: "claude-3-5-haiku-latest".to_string(),
869            prompt: "hello".to_string(),
870            temperature: None,
871            max_output_tokens: None,
872            api_base: "https://api.anthropic.com/v1".to_string(),
873            anthropic_version: DEFAULT_ANTHROPIC_VERSION.to_string(),
874        };
875        let err = anthropic_prompt_async(&transport, request)
876            .await
877            .unwrap_err();
878        assert!(err.to_string().contains("API key cannot be empty"));
879    }
880}
881
882// ============================================================================
883// Provider & Credential Resolution (shared between HTTP, gRPC, and runtime)
884// ============================================================================
885
886/// AI provider identifier.
887#[derive(Debug, Clone, PartialEq, Eq)]
888pub enum AiProvider {
889    OpenAi,
890    Anthropic,
891    Groq,
892    OpenRouter,
893    Together,
894    Venice,
895    Ollama,
896    DeepSeek,
897    HuggingFace,
898    Local,
899    Custom(String),
900}
901
902impl AiProvider {
903    pub fn token(&self) -> &str {
904        match self {
905            Self::OpenAi => "openai",
906            Self::Anthropic => "anthropic",
907            Self::Groq => "groq",
908            Self::OpenRouter => "openrouter",
909            Self::Together => "together",
910            Self::Venice => "venice",
911            Self::Ollama => "ollama",
912            Self::DeepSeek => "deepseek",
913            Self::HuggingFace => "huggingface",
914            Self::Local => "local",
915            Self::Custom(name) => name.as_str(),
916        }
917    }
918
919    pub fn default_prompt_model(&self) -> &str {
920        match self {
921            Self::OpenAi => DEFAULT_OPENAI_PROMPT_MODEL,
922            Self::Anthropic => DEFAULT_ANTHROPIC_PROMPT_MODEL,
923            Self::Groq => "llama-3.3-70b-versatile",
924            Self::OpenRouter => "auto",
925            Self::Together => "meta-llama/Meta-Llama-3-8B-Instruct",
926            Self::Venice => "llama-3.3-70b",
927            Self::Ollama => "llama3",
928            Self::DeepSeek => "deepseek-chat",
929            Self::HuggingFace => "mistralai/Mistral-7B-Instruct-v0.3",
930            Self::Local => "sentence-transformers/all-MiniLM-L6-v2",
931            Self::Custom(_) => DEFAULT_OPENAI_PROMPT_MODEL,
932        }
933    }
934
935    pub fn prompt_model_env_name(&self) -> String {
936        format!("REDDB_{}_PROMPT_MODEL", self.token().to_ascii_uppercase())
937    }
938
939    pub fn default_embedding_model(&self) -> &str {
940        match self {
941            Self::Ollama => "nomic-embed-text",
942            Self::HuggingFace | Self::Local => "sentence-transformers/all-MiniLM-L6-v2",
943            _ => DEFAULT_OPENAI_EMBEDDING_MODEL,
944        }
945    }
946
947    pub fn default_api_base(&self) -> &str {
948        match self {
949            Self::OpenAi => DEFAULT_OPENAI_API_BASE,
950            Self::Anthropic => DEFAULT_ANTHROPIC_API_BASE,
951            Self::Groq => "https://api.groq.com/openai/v1",
952            Self::OpenRouter => "https://openrouter.ai/api/v1",
953            Self::Together => "https://api.together.xyz/v1",
954            Self::Venice => "https://api.venice.ai/api/v1",
955            Self::Ollama => "http://localhost:11434/v1",
956            Self::DeepSeek => "https://api.deepseek.com/v1",
957            Self::HuggingFace => "https://api-inference.huggingface.co",
958            Self::Local => "local",
959            Self::Custom(base) => base.as_str(),
960        }
961    }
962
963    pub fn api_base_env_name(&self) -> String {
964        format!("REDDB_{}_API_BASE", self.token().to_ascii_uppercase())
965    }
966
967    pub fn default_key_env_name(&self) -> String {
968        format!("REDDB_{}_API_KEY", self.token().to_ascii_uppercase())
969    }
970
971    pub fn alias_key_env_name(&self, alias: &str) -> String {
972        let normalized = normalize_alias_token(alias);
973        format!(
974            "REDDB_{}_API_KEY_{normalized}",
975            self.token().to_ascii_uppercase()
976        )
977    }
978
979    pub fn resolve_api_base(&self) -> String {
980        if let Ok(value) = std::env::var(self.api_base_env_name()) {
981            let value = value.trim().to_string();
982            if !value.is_empty() {
983                return value;
984            }
985        }
986        self.default_api_base().to_string()
987    }
988
989    /// Resolve API base URL checking KV store too (for custom base_url config).
990    pub fn resolve_api_base_with_kv<F>(&self, alias: &str, kv_getter: &F) -> String
991    where
992        F: Fn(&str) -> crate::RedDBResult<Option<String>>,
993    {
994        // 1. Env var
995        if let Ok(value) = std::env::var(self.api_base_env_name()) {
996            let value = value.trim().to_string();
997            if !value.is_empty() {
998                return value;
999            }
1000        }
1001        // 2. KV store: {provider}/{alias}/base_url
1002        let kv_key = format!("red.config.ai.{}.{alias}.base_url", self.token());
1003        if let Ok(Some(value)) = kv_getter(&kv_key) {
1004            let value = value.trim().to_string();
1005            if !value.is_empty() {
1006                return value;
1007            }
1008        }
1009        self.default_api_base().to_string()
1010    }
1011
1012    /// Whether this provider uses the OpenAI-compatible API format.
1013    pub fn is_openai_compatible(&self) -> bool {
1014        matches!(
1015            self,
1016            Self::OpenAi
1017                | Self::Groq
1018                | Self::OpenRouter
1019                | Self::Together
1020                | Self::Venice
1021                | Self::Ollama
1022                | Self::DeepSeek
1023                | Self::Custom(_)
1024        )
1025    }
1026
1027    /// Whether this provider requires an API key (Ollama/Local don't).
1028    pub fn requires_api_key(&self) -> bool {
1029        !matches!(self, Self::Ollama | Self::Local)
1030    }
1031}
1032
1033/// Parse a provider string into AiProvider.
1034pub fn parse_provider(name: &str) -> crate::RedDBResult<AiProvider> {
1035    match name.trim().to_ascii_lowercase().as_str() {
1036        "openai" => Ok(AiProvider::OpenAi),
1037        "anthropic" => Ok(AiProvider::Anthropic),
1038        "groq" => Ok(AiProvider::Groq),
1039        "openrouter" | "open_router" => Ok(AiProvider::OpenRouter),
1040        "together" => Ok(AiProvider::Together),
1041        "venice" => Ok(AiProvider::Venice),
1042        "ollama" => Ok(AiProvider::Ollama),
1043        "deepseek" | "deep_seek" => Ok(AiProvider::DeepSeek),
1044        "huggingface" | "hf" => Ok(AiProvider::HuggingFace),
1045        "local" => Ok(AiProvider::Local),
1046        other => {
1047            // Treat as custom provider if it looks like a URL
1048            if other.starts_with("http://") || other.starts_with("https://") {
1049                Ok(AiProvider::Custom(other.to_string()))
1050            } else {
1051                Err(crate::RedDBError::Query(format!(
1052                    "unsupported AI provider '{other}'; expected: openai, anthropic, groq, \
1053                     openrouter, together, venice, ollama, deepseek, huggingface, local"
1054                )))
1055            }
1056        }
1057    }
1058}
1059
1060/// Resolve the default AI provider. Checks:
1061/// 1. `REDDB_AI_PROVIDER` env var
1062/// 2. `red_config` KV key `red.config.ai.default.provider`
1063/// 3. Falls back to OpenAI
1064pub fn resolve_default_provider<F>(kv_getter: &F) -> AiProvider
1065where
1066    F: Fn(&str) -> crate::RedDBResult<Option<String>>,
1067{
1068    // 1. Env var
1069    if let Ok(value) = std::env::var("REDDB_AI_PROVIDER") {
1070        let value = value.trim().to_string();
1071        if !value.is_empty() {
1072            if let Ok(provider) = parse_provider(&value) {
1073                return provider;
1074            }
1075        }
1076    }
1077    // 2. KV store
1078    if let Ok(Some(value)) = kv_getter("red.config.ai.default.provider") {
1079        let value = value.trim().to_string();
1080        if !value.is_empty() {
1081            if let Ok(provider) = parse_provider(&value) {
1082                return provider;
1083            }
1084        }
1085    }
1086    AiProvider::OpenAi
1087}
1088
1089/// Resolve the default AI model. Checks:
1090/// 1. `REDDB_AI_MODEL` env var
1091/// 2. `red_config` KV key `red.config.ai.default.model`
1092/// 3. Falls back to provider's default
1093pub fn resolve_default_model<F>(provider: &AiProvider, kv_getter: &F) -> String
1094where
1095    F: Fn(&str) -> crate::RedDBResult<Option<String>>,
1096{
1097    // 1. Env var
1098    if let Ok(value) = std::env::var("REDDB_AI_MODEL") {
1099        let value = value.trim().to_string();
1100        if !value.is_empty() {
1101            return value;
1102        }
1103    }
1104    // 2. Provider-specific env var
1105    if let Ok(value) = std::env::var(provider.prompt_model_env_name()) {
1106        let value = value.trim().to_string();
1107        if !value.is_empty() {
1108            return value;
1109        }
1110    }
1111    // 3. KV store
1112    if let Ok(Some(value)) = kv_getter("red.config.ai.default.model") {
1113        let value = value.trim().to_string();
1114        if !value.is_empty() {
1115            return value;
1116        }
1117    }
1118    provider.default_prompt_model().to_string()
1119}
1120
1121/// Resolve default provider + model from runtime KV store.
1122pub fn resolve_defaults_from_runtime(
1123    runtime: &crate::runtime::RedDBRuntime,
1124) -> (AiProvider, String) {
1125    use crate::application::ports::RuntimeEntityPort;
1126    let kv_getter = |key: &str| -> crate::RedDBResult<Option<String>> {
1127        match runtime.get_kv("red_config", key)? {
1128            Some((crate::storage::schema::Value::Text(s), _)) => Ok(Some(s.to_string())),
1129            _ => Ok(None),
1130        }
1131    };
1132    let provider = resolve_default_provider(&kv_getter);
1133    let model = resolve_default_model(&provider, &kv_getter);
1134    (provider, model)
1135}
1136
1137/// Resolve default provider + model via RuntimeEntityPort trait (for use in QueryUseCases).
1138pub fn resolve_defaults_from_runtime_port<
1139    P: crate::application::ports::RuntimeEntityPort + ?Sized,
1140>(
1141    runtime: &P,
1142) -> (AiProvider, String) {
1143    let kv_getter = |key: &str| -> crate::RedDBResult<Option<String>> {
1144        match runtime.get_kv("red_config", key)? {
1145            Some((crate::storage::schema::Value::Text(s), _)) => Ok(Some(s.to_string())),
1146            _ => Ok(None),
1147        }
1148    };
1149    let provider = resolve_default_provider(&kv_getter);
1150    let model = resolve_default_model(&provider, &kv_getter);
1151    (provider, model)
1152}
1153
1154/// Resolve an API key for a provider. Uses the chain:
1155/// 1. Environment variable with alias: `REDDB_OPENAI_API_KEY_{ALIAS}`
1156/// 2. Vault secret lookup via `kv_getter` closure
1157/// 3. Legacy KV store lookup via `kv_getter` closure
1158/// 4. Default environment variable: `REDDB_OPENAI_API_KEY`
1159///
1160/// `kv_getter` receives either a `red.secret.*` path or a legacy `red.config.*`
1161/// key and returns the value if found.
1162pub fn resolve_api_key<F>(
1163    provider: &AiProvider,
1164    credential_alias: Option<&str>,
1165    kv_getter: F,
1166) -> crate::RedDBResult<String>
1167where
1168    F: Fn(&str) -> crate::RedDBResult<Option<String>>,
1169{
1170    // Providers that don't require API keys
1171    if !provider.requires_api_key() {
1172        // Still try to find a key (user may have one for auth'd Ollama)
1173        if let Ok(value) = std::env::var(provider.default_key_env_name()) {
1174            let value = value.trim().to_string();
1175            if !value.is_empty() {
1176                return Ok(value);
1177            }
1178        }
1179        return Ok(String::new());
1180    }
1181
1182    if let Some(alias) = credential_alias.map(str::trim).filter(|a| !a.is_empty()) {
1183        // Try env var with alias
1184        if let Some(key) = resolve_key_from_env_alias(provider, alias) {
1185            return Ok(key);
1186        }
1187        if let Some(key) = kv_getter(&ai_api_secret_path(provider, alias))? {
1188            if !key.trim().is_empty() {
1189                return Ok(key);
1190            }
1191        }
1192        if let Some(secret_ref) = kv_getter(&ai_api_secret_ref_config_key(provider, alias))? {
1193            if let Some(key) = kv_getter(secret_ref.trim())? {
1194                if !key.trim().is_empty() {
1195                    return Ok(key);
1196                }
1197            }
1198        }
1199        let legacy_key = ai_api_legacy_config_key(provider, alias);
1200        if let Some(key) = kv_getter(&legacy_key)? {
1201            if !key.trim().is_empty() {
1202                return Ok(key);
1203            }
1204        }
1205        return Err(crate::RedDBError::Query(format!(
1206            "credential '{alias}' not found for {}. Set env {} or store it in the vault",
1207            provider.token(),
1208            provider.alias_key_env_name(alias)
1209        )));
1210    }
1211
1212    // Default env var
1213    if let Ok(value) = std::env::var(provider.default_key_env_name()) {
1214        let value = value.trim().to_string();
1215        if !value.is_empty() {
1216            return Ok(value);
1217        }
1218    }
1219
1220    if let Some(key) = kv_getter(&ai_api_secret_path(provider, "default"))? {
1221        if !key.trim().is_empty() {
1222            return Ok(key);
1223        }
1224    }
1225    if let Some(secret_ref) = kv_getter(&ai_api_secret_ref_config_key(provider, "default"))? {
1226        if let Some(key) = kv_getter(secret_ref.trim())? {
1227            if !key.trim().is_empty() {
1228                return Ok(key);
1229            }
1230        }
1231    }
1232    if let Some(key) = kv_getter(&ai_api_legacy_config_key(provider, "default"))? {
1233        if !key.trim().is_empty() {
1234            return Ok(key);
1235        }
1236    }
1237
1238    let legacy_short_key = format!("{}/default", provider.token());
1239    if let Some(key) = kv_getter(&legacy_short_key)? {
1240        if !key.trim().is_empty() {
1241            return Ok(key);
1242        }
1243    }
1244
1245    Err(crate::RedDBError::Query(format!(
1246        "missing {} API key. Set {} or provide credential alias",
1247        provider.token(),
1248        provider.default_key_env_name()
1249    )))
1250}
1251
1252pub fn ai_api_secret_path(provider: &AiProvider, alias: &str) -> String {
1253    format!(
1254        "red.secret.ai.{}.{}.api_key",
1255        provider.token(),
1256        normalize_credential_alias_path(alias)
1257    )
1258}
1259
1260pub fn ai_api_secret_ref_config_key(provider: &AiProvider, alias: &str) -> String {
1261    format!(
1262        "red.config.ai.{}.{}.secret_ref",
1263        provider.token(),
1264        normalize_credential_alias_path(alias)
1265    )
1266}
1267
1268pub fn ai_api_legacy_config_key(provider: &AiProvider, alias: &str) -> String {
1269    format!(
1270        "red.config.ai.{}.{}.key",
1271        provider.token(),
1272        normalize_credential_alias_path(alias)
1273    )
1274}
1275
1276fn normalize_credential_alias_path(alias: &str) -> String {
1277    let alias = alias.trim();
1278    if alias.is_empty() {
1279        "default".to_string()
1280    } else {
1281        alias.to_ascii_lowercase()
1282    }
1283}
1284
1285fn resolve_key_from_env_alias(provider: &AiProvider, alias: &str) -> Option<String> {
1286    let env_name = provider.alias_key_env_name(alias);
1287    std::env::var(env_name)
1288        .ok()
1289        .map(|v| v.trim().to_string())
1290        .filter(|v| !v.is_empty())
1291}
1292
1293fn normalize_alias_token(alias: &str) -> String {
1294    let mut out = String::with_capacity(alias.len());
1295    for character in alias.chars() {
1296        if character.is_ascii_alphanumeric() {
1297            out.push(character.to_ascii_uppercase());
1298        } else {
1299            out.push('_');
1300        }
1301    }
1302    while out.contains("__") {
1303        out = out.replace("__", "_");
1304    }
1305    out.trim_matches('_').to_string()
1306}
1307
1308/// Convenience: resolve API key using a RedDBRuntime's KV store.
1309pub fn resolve_api_key_from_runtime(
1310    provider: &AiProvider,
1311    credential_alias: Option<&str>,
1312    runtime: &crate::runtime::RedDBRuntime,
1313) -> crate::RedDBResult<String> {
1314    use crate::application::ports::RuntimeEntityPort;
1315    resolve_api_key(provider, credential_alias, |kv_key| {
1316        if kv_key.starts_with("red.secret.") {
1317            return Ok(runtime.vault_kv_get(kv_key));
1318        }
1319        match runtime.get_kv("red_config", kv_key)? {
1320            Some((crate::storage::schema::Value::Text(secret), _)) => Ok(Some(secret.to_string())),
1321            Some(_) => Ok(None),
1322            None => Ok(None),
1323        }
1324    })
1325}
1326
1327// ============================================================================
1328// HuggingFace Inference API
1329// ============================================================================
1330
1331/// Generate embeddings via HuggingFace Inference API.
1332pub fn huggingface_embeddings(
1333    api_key: &str,
1334    model: &str,
1335    inputs: &[String],
1336    api_base: &str,
1337) -> crate::RedDBResult<OpenAiEmbeddingResponse> {
1338    let url = format!("{api_base}/pipeline/feature-extraction/{model}");
1339    let mut embeddings = Vec::with_capacity(inputs.len());
1340
1341    for input in inputs {
1342        let payload = crate::serde_json::json!({ "inputs": input }).to_string_compact();
1343        let (status, body_str) = http_post_json(&url, api_key, &[], payload, 90)
1344            .map_err(|e| crate::RedDBError::Query(format!("HuggingFace API error: {e}")))?;
1345        if !(200..300).contains(&status) {
1346            return Err(crate::RedDBError::Query(format!(
1347                "HuggingFace API error (status {status}): {body_str}"
1348            )));
1349        }
1350        let body: JsonValue = crate::serde_json::from_str(&body_str).map_err(|e| {
1351            crate::RedDBError::Query(format!("HuggingFace response parse error: {e}"))
1352        })?;
1353
1354        // HF returns [[f32, ...]] for single input
1355        let vector: Vec<f32> = match &body {
1356            JsonValue::Array(outer) => outer
1357                .iter()
1358                .filter_map(|v| v.as_f64().map(|n| n as f32))
1359                .collect(),
1360            _ => {
1361                return Err(crate::RedDBError::Query(
1362                    "unexpected HuggingFace embedding response format".to_string(),
1363                ))
1364            }
1365        };
1366        embeddings.push(vector);
1367    }
1368
1369    Ok(OpenAiEmbeddingResponse {
1370        provider: "huggingface",
1371        model: model.to_string(),
1372        embeddings,
1373        prompt_tokens: None,
1374        total_tokens: None,
1375    })
1376}
1377
1378/// Generate text via HuggingFace Inference API.
1379pub fn huggingface_prompt(
1380    api_key: &str,
1381    model: &str,
1382    prompt: &str,
1383    temperature: Option<f32>,
1384    max_tokens: Option<usize>,
1385    api_base: &str,
1386) -> crate::RedDBResult<AiPromptResponse> {
1387    let url = format!("{api_base}/models/{model}");
1388    let mut params = Map::new();
1389    if let Some(t) = temperature {
1390        params.insert("temperature".into(), JsonValue::Number(t as f64));
1391    }
1392    params.insert(
1393        "max_new_tokens".into(),
1394        JsonValue::Number(max_tokens.unwrap_or(512) as f64),
1395    );
1396    let payload = crate::serde_json::json!({
1397        "inputs": prompt,
1398        "parameters": JsonValue::Object(params)
1399    });
1400
1401    let (status, body_str) =
1402        http_post_json(&url, api_key, &[], payload.to_string_compact(), 120)
1403            .map_err(|e| crate::RedDBError::Query(format!("HuggingFace API error: {e}")))?;
1404    if !(200..300).contains(&status) {
1405        return Err(crate::RedDBError::Query(format!(
1406            "HuggingFace API error (status {status}): {body_str}"
1407        )));
1408    }
1409    let body: JsonValue = crate::serde_json::from_str(&body_str)
1410        .map_err(|e| crate::RedDBError::Query(format!("HuggingFace response parse error: {e}")))?;
1411
1412    let output_text = match &body {
1413        JsonValue::Array(arr) => arr
1414            .first()
1415            .and_then(|v| v.get("generated_text"))
1416            .and_then(JsonValue::as_str)
1417            .unwrap_or("")
1418            .to_string(),
1419        _ => body
1420            .get("generated_text")
1421            .and_then(JsonValue::as_str)
1422            .unwrap_or("")
1423            .to_string(),
1424    };
1425
1426    Ok(AiPromptResponse {
1427        provider: "huggingface",
1428        model: model.to_string(),
1429        output_text,
1430        prompt_tokens: None,
1431        completion_tokens: None,
1432        total_tokens: None,
1433        stop_reason: None,
1434    })
1435}
1436
1437// ============================================================================
1438// Local model stubs (requires 'local-models' feature flag)
1439// ============================================================================
1440
1441/// Local embedding via candle — requires `local-models` feature.
1442pub fn local_embeddings(
1443    _model_id: &str,
1444    _texts: &[String],
1445) -> crate::RedDBResult<OpenAiEmbeddingResponse> {
1446    Err(crate::RedDBError::FeatureNotEnabled(
1447        "local model inference requires the 'local-models' feature flag. \
1448         Build with: cargo build --features local-models. \
1449         Alternatively, use 'ollama' provider with a local Ollama server."
1450            .to_string(),
1451    ))
1452}
1453
1454/// Local prompt via candle — requires `local-models` feature.
1455pub fn local_prompt(_model_id: &str, _prompt: &str) -> crate::RedDBResult<AiPromptResponse> {
1456    Err(crate::RedDBError::FeatureNotEnabled(
1457        "local model inference requires the 'local-models' feature flag. \
1458         Build with: cargo build --features local-models. \
1459         Alternatively, use 'ollama' provider with a local Ollama server."
1460            .to_string(),
1461    ))
1462}
1463
1464// ============================================================================
1465// gRPC input collection — parity with HTTP /ai/embeddings
1466// ============================================================================
1467
1468/// Collect embedding inputs from any of the three supported shapes.
1469///
1470/// * `input: "..."` — single string.
1471/// * `inputs: ["...", ...]` — array of strings.
1472/// * `source_query: "SELECT ..."` — runs a SQL query and projects
1473///   either the named `source_field` from each row (source_mode =
1474///   "row", default) or every string cell of every result row
1475///   (source_mode = "result").
1476fn grpc_collect_embedding_inputs(
1477    runtime: &crate::runtime::RedDBRuntime,
1478    payload: &JsonValue,
1479) -> crate::RedDBResult<Vec<String>> {
1480    if let Some(source_query) = payload
1481        .get("source_query")
1482        .and_then(|v| v.as_str())
1483        .map(str::trim)
1484        .filter(|s| !s.is_empty())
1485    {
1486        return grpc_collect_inputs_from_source_query(runtime, payload, source_query);
1487    }
1488
1489    if let Some(arr) = payload.get("inputs").and_then(|v| v.as_array()) {
1490        let mut out = Vec::with_capacity(arr.len());
1491        for (idx, v) in arr.iter().enumerate() {
1492            let text = v.as_str().ok_or_else(|| {
1493                crate::RedDBError::Query(format!("field 'inputs[{idx}]' must be a string"))
1494            })?;
1495            if text.trim().is_empty() {
1496                return Err(crate::RedDBError::Query(format!(
1497                    "field 'inputs[{idx}]' cannot be empty"
1498                )));
1499            }
1500            out.push(text.to_string());
1501        }
1502        if out.is_empty() {
1503            return Err(crate::RedDBError::Query(
1504                "field 'inputs' must be a non-empty array of strings".to_string(),
1505            ));
1506        }
1507        return Ok(out);
1508    }
1509
1510    if let Some(single) = payload
1511        .get("input")
1512        .and_then(|v| v.as_str())
1513        .map(str::trim)
1514        .filter(|s| !s.is_empty())
1515    {
1516        return Ok(vec![single.to_string()]);
1517    }
1518
1519    Err(crate::RedDBError::Query(
1520        "provide either 'input', 'inputs', or 'source_query'".to_string(),
1521    ))
1522}
1523
1524fn grpc_collect_inputs_from_source_query(
1525    runtime: &crate::runtime::RedDBRuntime,
1526    payload: &JsonValue,
1527    source_query: &str,
1528) -> crate::RedDBResult<Vec<String>> {
1529    let result = runtime
1530        .execute_query(source_query)
1531        .map_err(|err| crate::RedDBError::Query(format!("source_query failed: {err}")))?;
1532
1533    let source_mode = payload
1534        .get("source_mode")
1535        .and_then(|v| v.as_str())
1536        .map(str::trim)
1537        .filter(|s| !s.is_empty())
1538        .unwrap_or("row")
1539        .to_ascii_lowercase();
1540
1541    let mut out: Vec<String> = Vec::new();
1542    match source_mode.as_str() {
1543        "row" => {
1544            let field = payload
1545                .get("source_field")
1546                .and_then(|v| v.as_str())
1547                .map(str::trim)
1548                .filter(|s| !s.is_empty())
1549                .ok_or_else(|| {
1550                    crate::RedDBError::Query(
1551                        "field 'source_field' is required when source_mode='row'".to_string(),
1552                    )
1553                })?;
1554            for rec in &result.result.records {
1555                for (key, value) in rec.iter_fields() {
1556                    if key.as_ref() == field {
1557                        if let crate::storage::schema::Value::Text(text) = value {
1558                            let trimmed = text.trim();
1559                            if !trimmed.is_empty() {
1560                                out.push(trimmed.to_string());
1561                            }
1562                        }
1563                    }
1564                }
1565            }
1566        }
1567        "result" => {
1568            for rec in &result.result.records {
1569                for (_, value) in rec.iter_fields() {
1570                    if let crate::storage::schema::Value::Text(text) = value {
1571                        let trimmed = text.trim();
1572                        if !trimmed.is_empty() {
1573                            out.push(trimmed.to_string());
1574                        }
1575                    }
1576                }
1577            }
1578        }
1579        other => {
1580            return Err(crate::RedDBError::Query(format!(
1581                "field 'source_mode' must be 'row' or 'result' (got '{other}')"
1582            )));
1583        }
1584    }
1585
1586    if out.is_empty() {
1587        return Err(crate::RedDBError::Query(
1588            "source_query produced zero non-empty text inputs".to_string(),
1589        ));
1590    }
1591    Ok(out)
1592}
1593
1594// ============================================================================
1595// gRPC stubs — delegate to the same logic as HTTP handlers
1596// ============================================================================
1597
1598/// gRPC embeddings — shared entrypoint that mirrors the HTTP handler.
1599///
1600/// Accepts the same JSON payload shape as `POST /ai/embeddings`:
1601///
1602/// ```json
1603/// { "provider": "openai", "model": "text-embedding-3-small",
1604///   "inputs": ["hello", "world"], "credential": "optional-alias" }
1605/// ```
1606///
1607/// Input shapes at parity with HTTP: `input` (single string),
1608/// `inputs` (array of strings), and `source_query` (SQL that the
1609/// runtime executes to materialise the input texts; `source_mode`
1610/// = `row` needs `source_collection` + `source_field`, `result`
1611/// uses the projected columns). Returns a JSON object with
1612/// `provider`, `model`, `embeddings`, `prompt_tokens`,
1613/// `total_tokens`. Non-OpenAI-compatible providers are rejected
1614/// with a clear message, matching the HTTP handler.
1615pub fn grpc_embeddings(
1616    runtime: &crate::runtime::RedDBRuntime,
1617    payload: &JsonValue,
1618) -> crate::RedDBResult<JsonValue> {
1619    let provider_name = payload
1620        .get("provider")
1621        .and_then(|v| v.as_str())
1622        .map(str::trim)
1623        .filter(|s| !s.is_empty())
1624        .unwrap_or("openai");
1625    let provider = parse_provider(provider_name)?;
1626    // Routing matrix mirrors `handle_ai_embeddings`. See that function
1627    // for the rationale; in short: HuggingFace gets its own wire
1628    // shape, Anthropic fails fast (no embeddings product), and Local
1629    // requires a build-time feature flag.
1630    match &provider {
1631        AiProvider::Anthropic => {
1632            return Err(crate::RedDBError::Query(
1633                "Anthropic does not offer an embeddings API. \
1634                 Re-issue the request against an OpenAI-compatible \
1635                 provider (openai, groq, ollama, openrouter, together, \
1636                 venice, deepseek), HuggingFace, or a custom base URL — \
1637                 RedDB does not silently route embeddings to a \
1638                 different provider than the one you named."
1639                    .to_string(),
1640            ));
1641        }
1642        AiProvider::Local => {
1643            return Err(crate::RedDBError::Query(
1644                "Local embeddings require the `local-models` feature \
1645                 flag at engine build time."
1646                    .to_string(),
1647            ));
1648        }
1649        _ => {}
1650    }
1651
1652    let inputs: Vec<String> = grpc_collect_embedding_inputs(runtime, payload)?;
1653
1654    let model = payload
1655        .get("model")
1656        .and_then(|v| v.as_str())
1657        .map(str::trim)
1658        .filter(|s| !s.is_empty())
1659        .map(str::to_string)
1660        .or_else(|| {
1661            std::env::var(format!(
1662                "REDDB_{}_EMBEDDING_MODEL",
1663                provider.token().to_ascii_uppercase()
1664            ))
1665            .ok()
1666        })
1667        .or_else(|| std::env::var("REDDB_OPENAI_EMBEDDING_MODEL").ok())
1668        .filter(|v| !v.trim().is_empty())
1669        .unwrap_or_else(|| provider.default_embedding_model().to_string());
1670
1671    let credential = payload
1672        .get("credential")
1673        .and_then(|v| v.as_str())
1674        .map(str::to_string);
1675    let api_key = resolve_api_key_from_runtime(&provider, credential.as_deref(), runtime)?;
1676
1677    let dimensions = payload
1678        .get("dimensions")
1679        .and_then(|v| v.as_i64())
1680        .and_then(|v| usize::try_from(v).ok())
1681        .filter(|v| *v > 0);
1682
1683    let response = match &provider {
1684        AiProvider::HuggingFace => {
1685            huggingface_embeddings(&api_key, &model, &inputs, &provider.resolve_api_base())?
1686        }
1687        _ => {
1688            let transport = crate::runtime::ai::transport::AiTransport::from_runtime(runtime);
1689            let request = OpenAiEmbeddingRequest {
1690                api_key,
1691                model,
1692                inputs,
1693                dimensions,
1694                api_base: provider.resolve_api_base(),
1695            };
1696            crate::runtime::ai::block_on_ai(async move {
1697                openai_embeddings_async(&transport, request).await
1698            })
1699            .and_then(|result| result)?
1700        }
1701    };
1702
1703    let embeddings_json: Vec<JsonValue> = response
1704        .embeddings
1705        .into_iter()
1706        .map(|vec| {
1707            JsonValue::Array(
1708                vec.into_iter()
1709                    .map(|f| JsonValue::Number(f as f64))
1710                    .collect(),
1711            )
1712        })
1713        .collect();
1714
1715    let mut obj = Map::new();
1716    obj.insert(
1717        "provider".to_string(),
1718        JsonValue::String(response.provider.to_string()),
1719    );
1720    obj.insert("model".to_string(), JsonValue::String(response.model));
1721    obj.insert("embeddings".to_string(), JsonValue::Array(embeddings_json));
1722    if let Some(pt) = response.prompt_tokens {
1723        obj.insert("prompt_tokens".to_string(), JsonValue::Number(pt as f64));
1724    }
1725    if let Some(tt) = response.total_tokens {
1726        obj.insert("total_tokens".to_string(), JsonValue::Number(tt as f64));
1727    }
1728    Ok(JsonValue::Object(obj))
1729}
1730
1731/// gRPC stub for AI prompt.
1732pub fn grpc_prompt(
1733    _runtime: &crate::runtime::RedDBRuntime,
1734    _payload: &JsonValue,
1735) -> crate::RedDBResult<JsonValue> {
1736    Err(crate::RedDBError::FeatureNotEnabled(
1737        "AI prompt via gRPC requires HTTP endpoint; use POST /ai/prompt".to_string(),
1738    ))
1739}
1740
1741/// gRPC stub for AI credentials.
1742pub fn grpc_credentials(
1743    _runtime: &crate::runtime::RedDBRuntime,
1744    _payload: &JsonValue,
1745) -> crate::RedDBResult<JsonValue> {
1746    Err(crate::RedDBError::FeatureNotEnabled(
1747        "AI credentials via gRPC requires HTTP endpoint; use POST /ai/credentials".to_string(),
1748    ))
1749}