1use std::io::BufRead;
7use std::time::Duration;
8
9use crate::json::{parse_json, Map, Value as JsonValue};
10use crate::{RedDBError, RedDBResult};
11
12fn http_post_json(
18 url: &str,
19 api_key: &str,
20 extra_headers: &[(&str, &str)],
21 payload: String,
22 read_timeout_secs: u64,
23) -> Result<(u16, String), String> {
24 let agent: ureq::Agent = ureq::Agent::config_builder()
25 .timeout_connect(Some(Duration::from_secs(10)))
26 .timeout_send_request(Some(Duration::from_secs(30)))
27 .timeout_recv_response(Some(Duration::from_secs(read_timeout_secs)))
28 .timeout_recv_body(Some(Duration::from_secs(read_timeout_secs)))
29 .http_status_as_error(false)
30 .build()
31 .into();
32
33 let mut req = agent
34 .post(url)
35 .header("Content-Type", "application/json")
36 .header("Accept", "application/json");
37 for (k, v) in extra_headers {
38 req = req.header(*k, *v);
39 }
40 let trimmed_key = api_key.trim();
41 if !trimmed_key.is_empty() {
42 req = req.header("Authorization", &format!("Bearer {}", trimmed_key));
43 }
44
45 match req.send(payload) {
46 Ok(mut resp) => {
47 let status = resp.status().as_u16();
48 let body = resp
49 .body_mut()
50 .read_to_string()
51 .map_err(|err| format!("failed to read response body: {err}"))?;
52 Ok((status, body))
53 }
54 Err(err) => Err(format!("{err}")),
55 }
56}
57
58pub const DEFAULT_OPENAI_EMBEDDING_MODEL: &str = "text-embedding-3-small";
59pub const DEFAULT_OPENAI_API_BASE: &str = "https://api.openai.com/v1";
60pub const DEFAULT_OPENAI_PROMPT_MODEL: &str = "gpt-4.1-mini";
61pub const DEFAULT_ANTHROPIC_PROMPT_MODEL: &str = "claude-3-5-haiku-latest";
62pub const DEFAULT_ANTHROPIC_API_BASE: &str = "https://api.anthropic.com/v1";
63pub const DEFAULT_ANTHROPIC_VERSION: &str = "2023-06-01";
64
65#[derive(Debug, Clone)]
66pub struct OpenAiEmbeddingRequest {
67 pub api_key: String,
68 pub model: String,
69 pub inputs: Vec<String>,
70 pub dimensions: Option<usize>,
71 pub api_base: String,
72}
73
74#[derive(Debug, Clone)]
75pub struct OpenAiEmbeddingResponse {
76 pub provider: &'static str,
77 pub model: String,
78 pub embeddings: Vec<Vec<f32>>,
79 pub prompt_tokens: Option<u64>,
80 pub total_tokens: Option<u64>,
81}
82
83#[derive(Debug, Clone)]
84pub struct OpenAiPromptRequest {
85 pub api_key: String,
86 pub model: String,
87 pub prompt: String,
88 pub temperature: Option<f32>,
89 pub seed: Option<u64>,
90 pub max_output_tokens: Option<usize>,
91 pub api_base: String,
92 pub stream: bool,
93}
94
95#[derive(Debug, Clone)]
96pub struct AnthropicPromptRequest {
97 pub api_key: String,
98 pub model: String,
99 pub prompt: String,
100 pub temperature: Option<f32>,
101 pub max_output_tokens: Option<usize>,
102 pub api_base: String,
103 pub anthropic_version: String,
104}
105
106#[derive(Debug, Clone)]
107pub struct AiPromptResponse {
108 pub provider: &'static str,
109 pub model: String,
110 pub output_text: String,
111 pub output_chunks: Option<Vec<String>>,
112 pub prompt_tokens: Option<u64>,
113 pub completion_tokens: Option<u64>,
114 pub total_tokens: Option<u64>,
115 pub stop_reason: Option<String>,
116}
117
118#[deprecated(
119 since = "1.0.0",
120 note = "use AiBatchClient::embed_batch for embeddings or openai_embeddings_async with AiTransport when token usage metadata is required"
121)]
122pub fn openai_embeddings(request: OpenAiEmbeddingRequest) -> RedDBResult<OpenAiEmbeddingResponse> {
123 if request.model.trim().is_empty() {
124 return Err(RedDBError::Query(
125 "OpenAI embedding model cannot be empty".to_string(),
126 ));
127 }
128 if request.inputs.is_empty() {
129 return Err(RedDBError::Query(
130 "at least one input is required for embeddings".to_string(),
131 ));
132 }
133
134 let url = format!("{}/embeddings", request.api_base.trim_end_matches('/'));
135 let payload =
136 build_openai_embedding_payload(&request.model, &request.inputs, request.dimensions);
137
138 let (status, body) = http_post_json(&url, &request.api_key, &[], payload, 90)
139 .map_err(|err| RedDBError::Query(format!("OpenAI transport error: {err}")))?;
140
141 if !(200..300).contains(&status) {
142 let message = openai_error_message(&body)
143 .unwrap_or_else(|| "OpenAI embeddings request failed".to_string());
144 return Err(RedDBError::Query(format!(
145 "OpenAI embeddings request failed (status {status}): {message}"
146 )));
147 }
148
149 parse_openai_embedding_response(&body)
150}
151
152#[deprecated(since = "1.0.0", note = "use openai_prompt_async with AiTransport")]
153pub fn openai_prompt(request: OpenAiPromptRequest) -> RedDBResult<AiPromptResponse> {
154 if request.model.trim().is_empty() {
155 return Err(RedDBError::Query(
156 "OpenAI prompt model cannot be empty".to_string(),
157 ));
158 }
159 if request.prompt.trim().is_empty() {
160 return Err(RedDBError::Query("prompt cannot be empty".to_string()));
161 }
162
163 let url = format!(
164 "{}/chat/completions",
165 request.api_base.trim_end_matches('/')
166 );
167 let payload = build_openai_prompt_payload(
168 &request.model,
169 &request.prompt,
170 request.temperature,
171 request.seed,
172 request.max_output_tokens,
173 false,
174 );
175
176 let (status, body) = http_post_json(&url, &request.api_key, &[], payload, 120)
177 .map_err(|err| RedDBError::Query(format!("OpenAI transport error: {err}")))?;
178
179 if !(200..300).contains(&status) {
180 let message = openai_error_message(&body)
181 .unwrap_or_else(|| "OpenAI prompt request failed".to_string());
182 return Err(RedDBError::Query(format!(
183 "OpenAI prompt request failed (status {status}): {message}"
184 )));
185 }
186
187 parse_openai_prompt_response(&body, &request.model)
188}
189
190#[deprecated(since = "1.0.0", note = "use anthropic_prompt_async with AiTransport")]
191pub fn anthropic_prompt(request: AnthropicPromptRequest) -> RedDBResult<AiPromptResponse> {
192 if request.api_key.trim().is_empty() {
193 return Err(RedDBError::Query(
194 "Anthropic API key cannot be empty".to_string(),
195 ));
196 }
197 if request.model.trim().is_empty() {
198 return Err(RedDBError::Query(
199 "Anthropic model cannot be empty".to_string(),
200 ));
201 }
202 if request.prompt.trim().is_empty() {
203 return Err(RedDBError::Query("prompt cannot be empty".to_string()));
204 }
205
206 let url = format!("{}/messages", request.api_base.trim_end_matches('/'));
207 let payload = build_anthropic_prompt_payload(
208 &request.model,
209 &request.prompt,
210 request.temperature,
211 request.max_output_tokens,
212 );
213
214 let extra = [
219 ("x-api-key", request.api_key.as_str()),
220 ("anthropic-version", request.anthropic_version.as_str()),
221 ];
222 let (status, body) = http_post_json(&url, "", &extra, payload, 120)
223 .map_err(|err| RedDBError::Query(format!("Anthropic transport error: {err}")))?;
224
225 if !(200..300).contains(&status) {
226 let message = anthropic_error_message(&body)
227 .unwrap_or_else(|| "Anthropic prompt request failed".to_string());
228 return Err(RedDBError::Query(format!(
229 "Anthropic prompt request failed (status {status}): {message}"
230 )));
231 }
232
233 parse_anthropic_prompt_response(&body, &request.model)
234}
235
236pub async fn openai_embeddings_async(
241 transport: &crate::runtime::ai::transport::AiTransport,
242 request: OpenAiEmbeddingRequest,
243) -> RedDBResult<OpenAiEmbeddingResponse> {
244 if request.model.trim().is_empty() {
245 return Err(RedDBError::Query(
246 "OpenAI embedding model cannot be empty".to_string(),
247 ));
248 }
249 if request.inputs.is_empty() {
250 return Err(RedDBError::Query(
251 "at least one input is required for embeddings".to_string(),
252 ));
253 }
254
255 let url = format!("{}/embeddings", request.api_base.trim_end_matches('/'));
256 let payload =
257 build_openai_embedding_payload(&request.model, &request.inputs, request.dimensions);
258 let mut http_req =
259 crate::runtime::ai::transport::AiHttpRequest::post_json("openai-compatible", url, payload);
260 let trimmed_key = request.api_key.trim();
261 if !trimmed_key.is_empty() {
262 http_req = http_req.header("authorization", format!("Bearer {}", trimmed_key));
263 }
264
265 let response = transport
266 .request(http_req)
267 .await
268 .map_err(|e| RedDBError::Query(e.to_string()))?;
269
270 parse_openai_embedding_response(&response.body)
271}
272
273pub async fn openai_prompt_async(
278 transport: &crate::runtime::ai::transport::AiTransport,
279 request: OpenAiPromptRequest,
280) -> RedDBResult<AiPromptResponse> {
281 if request.model.trim().is_empty() {
282 return Err(RedDBError::Query(
283 "OpenAI prompt model cannot be empty".to_string(),
284 ));
285 }
286 if request.prompt.trim().is_empty() {
287 return Err(RedDBError::Query("prompt cannot be empty".to_string()));
288 }
289
290 let url = format!(
291 "{}/chat/completions",
292 request.api_base.trim_end_matches('/')
293 );
294 let payload = build_openai_prompt_payload(
295 &request.model,
296 &request.prompt,
297 request.temperature,
298 request.seed,
299 request.max_output_tokens,
300 request.stream,
301 );
302 let http_req = crate::runtime::ai::transport::AiHttpRequest::post_json("openai", url, payload)
303 .model(request.model.clone())
304 .header("authorization", format!("Bearer {}", request.api_key));
305
306 let response = transport
307 .request(http_req)
308 .await
309 .map_err(|e| RedDBError::Query(e.to_string()))?;
310
311 if request.stream {
312 parse_openai_streaming_prompt_response(&response.body, &request.model)
313 } else {
314 parse_openai_prompt_response(&response.body, &request.model)
315 }
316}
317
318pub fn openai_prompt_streaming(
324 request: OpenAiPromptRequest,
325 mut on_chunk: impl FnMut(&str) -> RedDBResult<()>,
326) -> RedDBResult<AiPromptResponse> {
327 if request.model.trim().is_empty() {
328 return Err(RedDBError::Query(
329 "OpenAI prompt model cannot be empty".to_string(),
330 ));
331 }
332 if request.prompt.trim().is_empty() {
333 return Err(RedDBError::Query("prompt cannot be empty".to_string()));
334 }
335
336 let url = format!(
337 "{}/chat/completions",
338 request.api_base.trim_end_matches('/')
339 );
340 let payload = build_openai_prompt_payload(
341 &request.model,
342 &request.prompt,
343 request.temperature,
344 request.seed,
345 request.max_output_tokens,
346 true,
347 );
348
349 let agent: ureq::Agent = ureq::Agent::config_builder()
350 .timeout_connect(Some(Duration::from_secs(10)))
351 .timeout_send_request(Some(Duration::from_secs(30)))
352 .timeout_recv_response(Some(Duration::from_secs(120)))
353 .timeout_recv_body(Some(Duration::from_secs(120)))
354 .http_status_as_error(false)
355 .build()
356 .into();
357
358 let mut req = agent
359 .post(&url)
360 .header("content-type", "application/json")
361 .header("accept", "text/event-stream");
362 let trimmed_key = request.api_key.trim();
363 if !trimmed_key.is_empty() {
364 req = req.header("authorization", &format!("Bearer {}", trimmed_key));
365 }
366
367 let mut response = req
368 .send(payload)
369 .map_err(|err| RedDBError::Query(format!("OpenAI transport error: {err}")))?;
370 let status = response.status().as_u16();
371 if !(200..300).contains(&status) {
372 let body = response
373 .body_mut()
374 .read_to_string()
375 .unwrap_or_else(|err| format!("failed to read response body: {err}"));
376 let message = openai_error_message(&body)
377 .unwrap_or_else(|| "OpenAI prompt request failed".to_string());
378 return Err(RedDBError::Query(format!(
379 "OpenAI prompt request failed (status {status}): {message}"
380 )));
381 }
382
383 let mut model = request.model;
384 let mut chunks = Vec::new();
385 let mut prompt_tokens = None;
386 let mut completion_tokens = None;
387 let mut total_tokens = None;
388 let mut stop_reason = None;
389
390 let mut reader = std::io::BufReader::new(response.body_mut().as_reader());
391 let mut line = String::new();
392 loop {
393 line.clear();
394 let read = reader.read_line(&mut line).map_err(|err| {
395 RedDBError::Query(format!("failed to read OpenAI streaming response: {err}"))
396 })?;
397 if read == 0 {
398 break;
399 }
400
401 let trimmed = line.trim();
402 let Some(data) = trimmed.strip_prefix("data:") else {
403 continue;
404 };
405 let data = data.trim();
406 if data.is_empty() {
407 continue;
408 }
409 if data == "[DONE]" {
410 break;
411 }
412
413 let parsed = parse_json(data).map_err(|err| {
414 RedDBError::Query(format!(
415 "invalid OpenAI streaming prompt JSON response: {err}"
416 ))
417 })?;
418 let json = JsonValue::from(parsed);
419 if let Some(value) = json.get("model").and_then(JsonValue::as_str) {
420 model = value.to_string();
421 }
422 if let Some(usage) = json.get("usage") {
423 prompt_tokens = usage
424 .get("prompt_tokens")
425 .and_then(JsonValue::as_i64)
426 .and_then(|value| u64::try_from(value).ok())
427 .or(prompt_tokens);
428 completion_tokens = usage
429 .get("completion_tokens")
430 .and_then(JsonValue::as_i64)
431 .and_then(|value| u64::try_from(value).ok())
432 .or(completion_tokens);
433 total_tokens = usage
434 .get("total_tokens")
435 .and_then(JsonValue::as_i64)
436 .and_then(|value| u64::try_from(value).ok())
437 .or(total_tokens);
438 }
439
440 let Some(choices) = json.get("choices").and_then(JsonValue::as_array) else {
441 continue;
442 };
443 let Some(first_choice) = choices.first() else {
444 continue;
445 };
446 if let Some(reason) = first_choice
447 .get("finish_reason")
448 .and_then(JsonValue::as_str)
449 {
450 stop_reason = Some(reason.to_string());
451 }
452 if let Some(text) = first_choice
453 .get("delta")
454 .and_then(|delta| delta.get("content"))
455 .and_then(JsonValue::as_str)
456 {
457 if !text.is_empty() {
458 on_chunk(text)?;
459 chunks.push(text.to_string());
460 }
461 }
462 }
463
464 if chunks.is_empty() {
465 return Err(RedDBError::Query(
466 "OpenAI streaming prompt response missing text content".to_string(),
467 ));
468 }
469
470 let output_text = chunks.concat();
471 let total_tokens = total_tokens.or_else(|| match (prompt_tokens, completion_tokens) {
472 (Some(prompt), Some(completion)) => Some(prompt.saturating_add(completion)),
473 _ => None,
474 });
475
476 Ok(AiPromptResponse {
477 provider: "openai",
478 model,
479 output_text,
480 output_chunks: Some(chunks),
481 prompt_tokens,
482 completion_tokens,
483 total_tokens,
484 stop_reason,
485 })
486}
487
488pub async fn anthropic_prompt_async(
493 transport: &crate::runtime::ai::transport::AiTransport,
494 request: AnthropicPromptRequest,
495) -> RedDBResult<AiPromptResponse> {
496 if request.api_key.trim().is_empty() {
497 return Err(RedDBError::Query(
498 "Anthropic API key cannot be empty".to_string(),
499 ));
500 }
501 if request.model.trim().is_empty() {
502 return Err(RedDBError::Query(
503 "Anthropic model cannot be empty".to_string(),
504 ));
505 }
506 if request.prompt.trim().is_empty() {
507 return Err(RedDBError::Query("prompt cannot be empty".to_string()));
508 }
509
510 let url = format!("{}/messages", request.api_base.trim_end_matches('/'));
511 let payload = build_anthropic_prompt_payload(
512 &request.model,
513 &request.prompt,
514 request.temperature,
515 request.max_output_tokens,
516 );
517 let http_req =
518 crate::runtime::ai::transport::AiHttpRequest::post_json("anthropic", url, payload)
519 .model(request.model.clone())
520 .header("x-api-key", request.api_key)
521 .header("anthropic-version", request.anthropic_version);
522
523 let response = transport
524 .request(http_req)
525 .await
526 .map_err(|e| RedDBError::Query(e.to_string()))?;
527
528 parse_anthropic_prompt_response(&response.body, &request.model)
529}
530
531pub(crate) fn build_embedding_payload(model: &str, inputs: &[String]) -> String {
533 build_openai_embedding_payload(model, inputs, None)
534}
535
536pub(crate) fn parse_embedding_vectors(body: &str) -> Result<Vec<Vec<f32>>, String> {
538 parse_openai_embedding_response(body)
539 .map(|r| r.embeddings)
540 .map_err(|e| e.to_string())
541}
542
543pub(crate) fn parse_embedding_response(body: &str) -> Result<OpenAiEmbeddingResponse, String> {
544 parse_openai_embedding_response(body).map_err(|e| e.to_string())
545}
546
547fn build_openai_embedding_payload(
548 model: &str,
549 inputs: &[String],
550 dimensions: Option<usize>,
551) -> String {
552 let mut object = Map::new();
553 object.insert("model".to_string(), JsonValue::String(model.to_string()));
554 if inputs.len() == 1 {
555 object.insert("input".to_string(), JsonValue::String(inputs[0].clone()));
556 } else {
557 object.insert(
558 "input".to_string(),
559 JsonValue::Array(inputs.iter().cloned().map(JsonValue::String).collect()),
560 );
561 }
562 if let Some(dimensions) = dimensions {
563 object.insert(
564 "dimensions".to_string(),
565 JsonValue::Number(dimensions as f64),
566 );
567 }
568 object.insert(
569 "encoding_format".to_string(),
570 JsonValue::String("float".to_string()),
571 );
572 JsonValue::Object(object).to_string_compact()
573}
574
575fn openai_error_message(body: &str) -> Option<String> {
576 provider_error_message(body)
577}
578
579fn anthropic_error_message(body: &str) -> Option<String> {
580 provider_error_message(body)
581}
582
583fn provider_error_message(body: &str) -> Option<String> {
584 let parsed = parse_json(body).ok().map(JsonValue::from)?;
585 let error = parsed.get("error")?;
586 if let Some(message) = error.get("message").and_then(JsonValue::as_str) {
587 let trimmed = message.trim();
588 if !trimmed.is_empty() {
589 return Some(trimmed.to_string());
590 }
591 }
592 None
593}
594
595fn build_openai_prompt_payload(
596 model: &str,
597 prompt: &str,
598 temperature: Option<f32>,
599 seed: Option<u64>,
600 max_output_tokens: Option<usize>,
601 stream: bool,
602) -> String {
603 let mut object = Map::new();
604 object.insert("model".to_string(), JsonValue::String(model.to_string()));
605
606 let mut message = Map::new();
607 message.insert("role".to_string(), JsonValue::String("user".to_string()));
608 message.insert("content".to_string(), JsonValue::String(prompt.to_string()));
609 object.insert(
610 "messages".to_string(),
611 JsonValue::Array(vec![JsonValue::Object(message)]),
612 );
613
614 if let Some(temperature) = temperature {
615 object.insert(
616 "temperature".to_string(),
617 JsonValue::Number(temperature as f64),
618 );
619 }
620
621 if let Some(seed) = seed {
622 object.insert("seed".to_string(), JsonValue::Number(seed as f64));
623 }
624
625 if let Some(max_output_tokens) = max_output_tokens {
626 object.insert(
627 "max_tokens".to_string(),
628 JsonValue::Number(max_output_tokens as f64),
629 );
630 }
631
632 if stream {
633 object.insert("stream".to_string(), JsonValue::Bool(true));
634 let mut options = Map::new();
635 options.insert("include_usage".to_string(), JsonValue::Bool(true));
636 object.insert("stream_options".to_string(), JsonValue::Object(options));
637 }
638
639 JsonValue::Object(object).to_string_compact()
640}
641
642fn build_anthropic_prompt_payload(
643 model: &str,
644 prompt: &str,
645 temperature: Option<f32>,
646 max_output_tokens: Option<usize>,
647) -> String {
648 let mut object = Map::new();
649 object.insert("model".to_string(), JsonValue::String(model.to_string()));
650 object.insert(
651 "max_tokens".to_string(),
652 JsonValue::Number(max_output_tokens.unwrap_or(512) as f64),
653 );
654
655 let mut message = Map::new();
656 message.insert("role".to_string(), JsonValue::String("user".to_string()));
657 message.insert("content".to_string(), JsonValue::String(prompt.to_string()));
658 object.insert(
659 "messages".to_string(),
660 JsonValue::Array(vec![JsonValue::Object(message)]),
661 );
662
663 if let Some(temperature) = temperature {
664 object.insert(
665 "temperature".to_string(),
666 JsonValue::Number(temperature as f64),
667 );
668 }
669
670 JsonValue::Object(object).to_string_compact()
671}
672
673fn extract_text_from_parts(parts: &[JsonValue]) -> Option<String> {
674 let mut chunks = Vec::new();
675 for part in parts {
676 if let Some(text) = part.as_str() {
677 let trimmed = text.trim();
678 if !trimmed.is_empty() {
679 chunks.push(trimmed.to_string());
680 }
681 continue;
682 }
683
684 let Some(object) = part.as_object() else {
685 continue;
686 };
687 let Some(text) = object.get("text").and_then(JsonValue::as_str) else {
688 continue;
689 };
690 let trimmed = text.trim();
691 if !trimmed.is_empty() {
692 chunks.push(trimmed.to_string());
693 }
694 }
695
696 if chunks.is_empty() {
697 None
698 } else {
699 Some(chunks.join("\n\n"))
700 }
701}
702
703fn parse_openai_prompt_response(
704 body: &str,
705 requested_model: &str,
706) -> RedDBResult<AiPromptResponse> {
707 let parsed = parse_json(body)
708 .map_err(|err| RedDBError::Query(format!("invalid OpenAI prompt JSON response: {err}")))?;
709 let json = JsonValue::from(parsed);
710
711 let model = json
712 .get("model")
713 .and_then(JsonValue::as_str)
714 .unwrap_or(requested_model)
715 .to_string();
716
717 let Some(choices) = json.get("choices").and_then(JsonValue::as_array) else {
718 return Err(RedDBError::Query(
719 "OpenAI prompt response missing 'choices' array".to_string(),
720 ));
721 };
722 let Some(first_choice) = choices.first() else {
723 return Err(RedDBError::Query(
724 "OpenAI prompt response contains no choices".to_string(),
725 ));
726 };
727
728 let output_text = first_choice
729 .get("message")
730 .and_then(|message| {
731 if let Some(text) = message.get("content").and_then(JsonValue::as_str) {
732 let trimmed = text.trim();
733 if !trimmed.is_empty() {
734 return Some(trimmed.to_string());
735 }
736 }
737 message
738 .get("content")
739 .and_then(JsonValue::as_array)
740 .and_then(extract_text_from_parts)
741 })
742 .ok_or_else(|| {
743 RedDBError::Query("OpenAI prompt response missing text content".to_string())
744 })?;
745
746 let prompt_tokens = json
747 .get("usage")
748 .and_then(|usage| usage.get("prompt_tokens"))
749 .and_then(JsonValue::as_i64)
750 .and_then(|value| u64::try_from(value).ok());
751 let completion_tokens = json
752 .get("usage")
753 .and_then(|usage| usage.get("completion_tokens"))
754 .and_then(JsonValue::as_i64)
755 .and_then(|value| u64::try_from(value).ok());
756 let total_tokens = json
757 .get("usage")
758 .and_then(|usage| usage.get("total_tokens"))
759 .and_then(JsonValue::as_i64)
760 .and_then(|value| u64::try_from(value).ok())
761 .or_else(|| match (prompt_tokens, completion_tokens) {
762 (Some(prompt), Some(completion)) => Some(prompt.saturating_add(completion)),
763 _ => None,
764 });
765
766 let stop_reason = first_choice
767 .get("finish_reason")
768 .and_then(JsonValue::as_str)
769 .map(str::to_string);
770
771 Ok(AiPromptResponse {
772 provider: "openai",
773 model,
774 output_text,
775 output_chunks: None,
776 prompt_tokens,
777 completion_tokens,
778 total_tokens,
779 stop_reason,
780 })
781}
782
783fn parse_openai_streaming_prompt_response(
784 body: &str,
785 requested_model: &str,
786) -> RedDBResult<AiPromptResponse> {
787 let mut model = requested_model.to_string();
788 let mut chunks = Vec::new();
789 let mut prompt_tokens = None;
790 let mut completion_tokens = None;
791 let mut total_tokens = None;
792 let mut stop_reason = None;
793
794 for line in body.lines() {
795 let line = line.trim();
796 let Some(data) = line.strip_prefix("data:") else {
797 continue;
798 };
799 let data = data.trim();
800 if data.is_empty() {
801 continue;
802 }
803 if data == "[DONE]" {
804 break;
805 }
806
807 let parsed = parse_json(data).map_err(|err| {
808 RedDBError::Query(format!(
809 "invalid OpenAI streaming prompt JSON response: {err}"
810 ))
811 })?;
812 let json = JsonValue::from(parsed);
813 if let Some(value) = json.get("model").and_then(JsonValue::as_str) {
814 model = value.to_string();
815 }
816 if let Some(usage) = json.get("usage") {
817 prompt_tokens = usage
818 .get("prompt_tokens")
819 .and_then(JsonValue::as_i64)
820 .and_then(|value| u64::try_from(value).ok())
821 .or(prompt_tokens);
822 completion_tokens = usage
823 .get("completion_tokens")
824 .and_then(JsonValue::as_i64)
825 .and_then(|value| u64::try_from(value).ok())
826 .or(completion_tokens);
827 total_tokens = usage
828 .get("total_tokens")
829 .and_then(JsonValue::as_i64)
830 .and_then(|value| u64::try_from(value).ok())
831 .or(total_tokens);
832 }
833
834 let Some(choices) = json.get("choices").and_then(JsonValue::as_array) else {
835 continue;
836 };
837 let Some(first_choice) = choices.first() else {
838 continue;
839 };
840 if let Some(reason) = first_choice
841 .get("finish_reason")
842 .and_then(JsonValue::as_str)
843 {
844 stop_reason = Some(reason.to_string());
845 }
846 if let Some(text) = first_choice
847 .get("delta")
848 .and_then(|delta| delta.get("content"))
849 .and_then(JsonValue::as_str)
850 {
851 if !text.is_empty() {
852 chunks.push(text.to_string());
853 }
854 }
855 }
856
857 if chunks.is_empty() {
858 return Err(RedDBError::Query(
859 "OpenAI streaming prompt response missing text content".to_string(),
860 ));
861 }
862
863 let output_text = chunks.concat();
864 let total_tokens = total_tokens.or_else(|| match (prompt_tokens, completion_tokens) {
865 (Some(prompt), Some(completion)) => Some(prompt.saturating_add(completion)),
866 _ => None,
867 });
868
869 Ok(AiPromptResponse {
870 provider: "openai",
871 model,
872 output_text,
873 output_chunks: Some(chunks),
874 prompt_tokens,
875 completion_tokens,
876 total_tokens,
877 stop_reason,
878 })
879}
880
881fn parse_anthropic_prompt_response(
882 body: &str,
883 requested_model: &str,
884) -> RedDBResult<AiPromptResponse> {
885 let parsed = parse_json(body).map_err(|err| {
886 RedDBError::Query(format!("invalid Anthropic prompt JSON response: {err}"))
887 })?;
888 let json = JsonValue::from(parsed);
889
890 let model = json
891 .get("model")
892 .and_then(JsonValue::as_str)
893 .unwrap_or(requested_model)
894 .to_string();
895
896 let Some(content_parts) = json.get("content").and_then(JsonValue::as_array) else {
897 return Err(RedDBError::Query(
898 "Anthropic prompt response missing 'content' array".to_string(),
899 ));
900 };
901
902 let output_text = extract_text_from_parts(content_parts).ok_or_else(|| {
903 RedDBError::Query("Anthropic prompt response missing text content".to_string())
904 })?;
905
906 let prompt_tokens = json
907 .get("usage")
908 .and_then(|usage| usage.get("input_tokens"))
909 .and_then(JsonValue::as_i64)
910 .and_then(|value| u64::try_from(value).ok());
911 let completion_tokens = json
912 .get("usage")
913 .and_then(|usage| usage.get("output_tokens"))
914 .and_then(JsonValue::as_i64)
915 .and_then(|value| u64::try_from(value).ok());
916 let total_tokens = match (prompt_tokens, completion_tokens) {
917 (Some(prompt), Some(completion)) => Some(prompt.saturating_add(completion)),
918 _ => None,
919 };
920
921 let stop_reason = json
922 .get("stop_reason")
923 .and_then(JsonValue::as_str)
924 .map(str::to_string);
925
926 Ok(AiPromptResponse {
927 provider: "anthropic",
928 model,
929 output_text,
930 output_chunks: None,
931 prompt_tokens,
932 completion_tokens,
933 total_tokens,
934 stop_reason,
935 })
936}
937
938fn parse_openai_embedding_response(body: &str) -> RedDBResult<OpenAiEmbeddingResponse> {
939 let parsed = parse_json(body).map_err(|err| {
940 RedDBError::Query(format!("invalid OpenAI embeddings JSON response: {err}"))
941 })?;
942 let json = JsonValue::from(parsed);
943
944 let model = json
945 .get("model")
946 .and_then(JsonValue::as_str)
947 .unwrap_or(DEFAULT_OPENAI_EMBEDDING_MODEL)
948 .to_string();
949
950 let Some(data) = json.get("data").and_then(JsonValue::as_array) else {
951 return Err(RedDBError::Query(
952 "OpenAI response missing 'data' array".to_string(),
953 ));
954 };
955
956 let mut rows: Vec<(usize, Vec<f32>)> = Vec::with_capacity(data.len());
957 for (position, item) in data.iter().enumerate() {
958 let index = item
959 .get("index")
960 .and_then(JsonValue::as_i64)
961 .and_then(|value| usize::try_from(value).ok())
962 .unwrap_or(position);
963
964 let Some(embedding_values) = item.get("embedding").and_then(JsonValue::as_array) else {
965 return Err(RedDBError::Query(
966 "OpenAI response contains item without 'embedding' array".to_string(),
967 ));
968 };
969 if embedding_values.is_empty() {
970 return Err(RedDBError::Query(
971 "OpenAI response contains empty embedding vector".to_string(),
972 ));
973 }
974
975 let mut embedding = Vec::with_capacity(embedding_values.len());
976 for value in embedding_values {
977 let Some(number) = value.as_f64() else {
978 return Err(RedDBError::Query(
979 "OpenAI response contains non-numeric embedding value".to_string(),
980 ));
981 };
982 embedding.push(number as f32);
983 }
984 rows.push((index, embedding));
985 }
986 rows.sort_by_key(|(index, _)| *index);
987 let embeddings = rows.into_iter().map(|(_, embedding)| embedding).collect();
988
989 let prompt_tokens = json
990 .get("usage")
991 .and_then(|usage| usage.get("prompt_tokens"))
992 .and_then(JsonValue::as_i64)
993 .and_then(|value| u64::try_from(value).ok());
994 let total_tokens = json
995 .get("usage")
996 .and_then(|usage| usage.get("total_tokens"))
997 .and_then(JsonValue::as_i64)
998 .and_then(|value| u64::try_from(value).ok());
999
1000 Ok(OpenAiEmbeddingResponse {
1001 provider: "openai",
1002 model,
1003 embeddings,
1004 prompt_tokens,
1005 total_tokens,
1006 })
1007}
1008
1009#[cfg(test)]
1010mod tests {
1011 use super::*;
1012
1013 #[test]
1014 fn parse_openai_embedding_response_extracts_vectors() {
1015 let body = r#"{
1016 "object":"list",
1017 "data":[
1018 {"object":"embedding","index":1,"embedding":[0.3,0.4]},
1019 {"object":"embedding","index":0,"embedding":[0.1,0.2]}
1020 ],
1021 "model":"text-embedding-3-small",
1022 "usage":{"prompt_tokens":12,"total_tokens":12}
1023 }"#;
1024
1025 let result = parse_openai_embedding_response(body).expect("response should parse");
1026 assert_eq!(result.provider, "openai");
1027 assert_eq!(result.model, "text-embedding-3-small");
1028 assert_eq!(result.embeddings.len(), 2);
1029 assert_eq!(result.embeddings[0], vec![0.1, 0.2]);
1030 assert_eq!(result.embeddings[1], vec![0.3, 0.4]);
1031 assert_eq!(result.prompt_tokens, Some(12));
1032 assert_eq!(result.total_tokens, Some(12));
1033 }
1034
1035 #[test]
1036 fn openai_error_message_extracts_nested_message() {
1037 let body = r#"{"error":{"message":"bad api key","type":"invalid_request_error"}}"#;
1038 assert_eq!(openai_error_message(body).as_deref(), Some("bad api key"));
1039 }
1040
1041 #[test]
1042 fn parse_openai_prompt_response_extracts_text_and_usage() {
1043 let body = r#"{
1044 "id":"chatcmpl_1",
1045 "object":"chat.completion",
1046 "model":"gpt-4.1-mini",
1047 "choices":[
1048 {
1049 "index":0,
1050 "finish_reason":"stop",
1051 "message":{"role":"assistant","content":"Resumo pronto."}
1052 }
1053 ],
1054 "usage":{"prompt_tokens":10,"completion_tokens":4,"total_tokens":14}
1055 }"#;
1056
1057 let parsed =
1058 parse_openai_prompt_response(body, DEFAULT_OPENAI_PROMPT_MODEL).expect("parse");
1059 assert_eq!(parsed.provider, "openai");
1060 assert_eq!(parsed.model, "gpt-4.1-mini");
1061 assert_eq!(parsed.output_text, "Resumo pronto.");
1062 assert_eq!(parsed.prompt_tokens, Some(10));
1063 assert_eq!(parsed.completion_tokens, Some(4));
1064 assert_eq!(parsed.total_tokens, Some(14));
1065 assert_eq!(parsed.stop_reason.as_deref(), Some("stop"));
1066 }
1067
1068 #[test]
1069 fn parse_anthropic_prompt_response_extracts_text_and_usage() {
1070 let body = r#"{
1071 "id":"msg_1",
1072 "model":"claude-3-5-haiku-latest",
1073 "type":"message",
1074 "content":[{"type":"text","text":"Action complete."}],
1075 "usage":{"input_tokens":11,"output_tokens":5},
1076 "stop_reason":"end_turn"
1077 }"#;
1078
1079 let parsed =
1080 parse_anthropic_prompt_response(body, DEFAULT_ANTHROPIC_PROMPT_MODEL).expect("parse");
1081 assert_eq!(parsed.provider, "anthropic");
1082 assert_eq!(parsed.model, "claude-3-5-haiku-latest");
1083 assert_eq!(parsed.output_text, "Action complete.");
1084 assert_eq!(parsed.prompt_tokens, Some(11));
1085 assert_eq!(parsed.completion_tokens, Some(5));
1086 assert_eq!(parsed.total_tokens, Some(16));
1087 assert_eq!(parsed.stop_reason.as_deref(), Some("end_turn"));
1088 }
1089
1090 #[test]
1091 fn resolve_api_key_prefers_vault_secret_over_legacy_config() {
1092 let provider = AiProvider::OpenAi;
1093 let alias = "vault_unit_alias";
1094 let secret_path = ai_api_secret_path(&provider, alias);
1095 let legacy_key = ai_api_legacy_config_key(&provider, alias);
1096
1097 let resolved = resolve_api_key(&provider, Some(alias), |key| {
1098 if key == secret_path {
1099 Ok(Some("vault-key".to_string()))
1100 } else if key == legacy_key {
1101 Ok(Some("legacy-key".to_string()))
1102 } else {
1103 Ok(None)
1104 }
1105 })
1106 .expect("resolve");
1107
1108 assert_eq!(resolved, "vault-key");
1109 }
1110
1111 #[test]
1112 fn resolve_api_key_uses_default_vault_secret_path() {
1113 let provider = AiProvider::OpenAi;
1114 let secret_path = ai_api_secret_path(&provider, "default");
1115
1116 let resolved = resolve_api_key(&provider, None, |key| {
1117 if key == secret_path {
1118 Ok(Some("default-vault-key".to_string()))
1119 } else {
1120 Ok(None)
1121 }
1122 })
1123 .expect("resolve");
1124
1125 assert_eq!(resolved, "default-vault-key");
1126 }
1127
1128 #[test]
1129 fn openai_prompt_payload_includes_temperature_and_seed_when_present() {
1130 let payload = build_openai_prompt_payload(
1131 "gpt-4.1-mini",
1132 "hello",
1133 Some(0.0),
1134 Some(42),
1135 Some(128),
1136 false,
1137 );
1138 let parsed = JsonValue::from(parse_json(&payload).expect("valid json"));
1139
1140 assert_eq!(
1141 parsed.get("temperature").and_then(JsonValue::as_f64),
1142 Some(0.0)
1143 );
1144 assert_eq!(parsed.get("seed").and_then(JsonValue::as_u64), Some(42));
1145 assert_eq!(
1146 parsed.get("max_tokens").and_then(JsonValue::as_u64),
1147 Some(128)
1148 );
1149 }
1150
1151 #[test]
1152 fn openai_prompt_payload_omits_seed_when_none() {
1153 let payload =
1154 build_openai_prompt_payload("gpt-4.1-mini", "hello", Some(0.0), None, None, false);
1155 let parsed = JsonValue::from(parse_json(&payload).expect("valid json"));
1156
1157 assert!(parsed.get("seed").is_none());
1158 assert!(parsed.get("stream").is_none());
1159 assert_eq!(
1160 parsed.get("temperature").and_then(JsonValue::as_f64),
1161 Some(0.0)
1162 );
1163 }
1164
1165 #[test]
1166 fn openai_prompt_payload_enables_stream_options() {
1167 let payload =
1168 build_openai_prompt_payload("gpt-4.1-mini", "hello", Some(0.0), None, None, true);
1169 let parsed = JsonValue::from(parse_json(&payload).expect("valid json"));
1170
1171 assert_eq!(
1172 parsed.get("stream").and_then(JsonValue::as_bool),
1173 Some(true)
1174 );
1175 assert_eq!(
1176 parsed
1177 .get("stream_options")
1178 .and_then(|value| value.get("include_usage"))
1179 .and_then(JsonValue::as_bool),
1180 Some(true)
1181 );
1182 }
1183
1184 #[test]
1185 fn openai_streaming_prompt_response_collects_delta_chunks() {
1186 let body = concat!(
1187 "data: {\"model\":\"gpt-test\",\"choices\":[{\"delta\":{\"content\":\"login \"},\"finish_reason\":null}]}\n\n",
1188 "data: {\"model\":\"gpt-test\",\"choices\":[{\"delta\":{\"content\":\"failed\"},\"finish_reason\":null}]}\n\n",
1189 "data: {\"model\":\"gpt-test\",\"choices\":[{\"delta\":{},\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":12,\"completion_tokens\":2,\"total_tokens\":14}}\n\n",
1190 "data: [DONE]\n\n",
1191 );
1192 let parsed = parse_openai_streaming_prompt_response(body, "fallback").unwrap();
1193
1194 assert_eq!(parsed.model, "gpt-test");
1195 assert_eq!(parsed.output_text, "login failed");
1196 assert_eq!(
1197 parsed.output_chunks.as_deref(),
1198 Some(["login ".to_string(), "failed".to_string()].as_slice())
1199 );
1200 assert_eq!(parsed.prompt_tokens, Some(12));
1201 assert_eq!(parsed.completion_tokens, Some(2));
1202 assert_eq!(parsed.total_tokens, Some(14));
1203 assert_eq!(parsed.stop_reason.as_deref(), Some("stop"));
1204 }
1205
1206 #[tokio::test]
1207 async fn openai_prompt_async_rejects_empty_model() {
1208 let transport = crate::runtime::ai::transport::AiTransport::new(Default::default());
1209 let request = OpenAiPromptRequest {
1210 api_key: "key".to_string(),
1211 model: " ".to_string(),
1212 prompt: "hello".to_string(),
1213 temperature: None,
1214 seed: None,
1215 max_output_tokens: None,
1216 api_base: "https://api.openai.com/v1".to_string(),
1217 stream: false,
1218 };
1219 let err = openai_prompt_async(&transport, request).await.unwrap_err();
1220 assert!(err.to_string().contains("model cannot be empty"));
1221 }
1222
1223 #[tokio::test]
1224 async fn openai_prompt_async_rejects_empty_prompt() {
1225 let transport = crate::runtime::ai::transport::AiTransport::new(Default::default());
1226 let request = OpenAiPromptRequest {
1227 api_key: "key".to_string(),
1228 model: "gpt-4.1-mini".to_string(),
1229 prompt: "".to_string(),
1230 temperature: None,
1231 seed: None,
1232 max_output_tokens: None,
1233 api_base: "https://api.openai.com/v1".to_string(),
1234 stream: false,
1235 };
1236 let err = openai_prompt_async(&transport, request).await.unwrap_err();
1237 assert!(err.to_string().contains("prompt cannot be empty"));
1238 }
1239
1240 use std::io::{Read as _, Write as _};
1250 use std::net::TcpListener;
1251 use std::sync::{Arc, Mutex};
1252 use std::thread;
1253
1254 struct CapturedRequest {
1255 method: String,
1256 path: String,
1257 headers: Vec<(String, String)>,
1258 body: String,
1259 }
1260
1261 fn parse_http_request(stream: &mut std::net::TcpStream) -> CapturedRequest {
1262 let mut buf = [0u8; 8192];
1263 let mut data = Vec::new();
1264 loop {
1265 let read = stream.read(&mut buf).unwrap_or(0);
1266 if read == 0 {
1267 break;
1268 }
1269 data.extend_from_slice(&buf[..read]);
1270 if let Some(idx) = data.windows(4).position(|w| w == b"\r\n\r\n") {
1271 let header_len = idx + 4;
1272 let header_str = String::from_utf8_lossy(&data[..idx]).to_string();
1273 let mut lines = header_str.split("\r\n");
1274 let request_line = lines.next().unwrap_or("");
1275 let mut parts = request_line.split_whitespace();
1276 let method = parts.next().unwrap_or("").to_string();
1277 let path = parts.next().unwrap_or("").to_string();
1278 let mut headers = Vec::new();
1279 let mut content_length: usize = 0;
1280 for line in lines {
1281 if let Some((k, v)) = line.split_once(':') {
1282 let k = k.trim().to_string();
1283 let v = v.trim().to_string();
1284 if k.eq_ignore_ascii_case("content-length") {
1285 content_length = v.parse().unwrap_or(0);
1286 }
1287 headers.push((k, v));
1288 }
1289 }
1290 while data.len() < header_len + content_length {
1291 let read = stream.read(&mut buf).unwrap_or(0);
1292 if read == 0 {
1293 break;
1294 }
1295 data.extend_from_slice(&buf[..read]);
1296 }
1297 let body = String::from_utf8_lossy(&data[header_len..header_len + content_length])
1298 .to_string();
1299 return CapturedRequest {
1300 method,
1301 path,
1302 headers,
1303 body,
1304 };
1305 }
1306 }
1307 CapturedRequest {
1308 method: String::new(),
1309 path: String::new(),
1310 headers: Vec::new(),
1311 body: String::new(),
1312 }
1313 }
1314
1315 fn spawn_mock(
1318 status: u16,
1319 response_body: &'static str,
1320 ) -> (String, Arc<Mutex<Option<CapturedRequest>>>) {
1321 let listener = TcpListener::bind("127.0.0.1:0").expect("bind");
1322 let addr = listener.local_addr().expect("addr");
1323 let captured: Arc<Mutex<Option<CapturedRequest>>> = Arc::new(Mutex::new(None));
1324 let captured_clone = Arc::clone(&captured);
1325 thread::spawn(move || {
1326 if let Ok((mut stream, _)) = listener.accept() {
1327 let req = parse_http_request(&mut stream);
1328 *captured_clone.lock().unwrap() = Some(req);
1329 let status_line = match status {
1330 200 => "200 OK",
1331 400 => "400 Bad Request",
1332 401 => "401 Unauthorized",
1333 500 => "500 Internal Server Error",
1334 _ => "200 OK",
1335 };
1336 let resp = format!(
1337 "HTTP/1.1 {status_line}\r\n\
1338 Content-Type: application/json\r\n\
1339 Content-Length: {}\r\n\
1340 Connection: close\r\n\r\n{}",
1341 response_body.len(),
1342 response_body
1343 );
1344 let _ = stream.write_all(resp.as_bytes());
1345 }
1346 });
1347 (format!("http://{}", addr), captured)
1348 }
1349
1350 #[test]
1351 fn openai_compat_chat_roundtrip_honors_arbitrary_api_base_and_headers() {
1352 let body = r#"{
1353 "id":"chatcmpl_x",
1354 "model":"custom-model",
1355 "choices":[{"index":0,"finish_reason":"stop","message":{"role":"assistant","content":"hi"}}],
1356 "usage":{"prompt_tokens":7,"completion_tokens":2,"total_tokens":9}
1357 }"#;
1358 let (base, captured) = spawn_mock(200, body);
1359
1360 let req = OpenAiCompatChatRequest {
1361 api_base: base.clone(),
1362 api_key: "sk-test".to_string(),
1363 model: "custom-model".to_string(),
1364 prompt: "say hi".to_string(),
1365 temperature: None,
1366 seed: None,
1367 max_output_tokens: None,
1368 extra_headers: vec![("X-Custom-Tag".to_string(), "abc".to_string())],
1369 };
1370 let resp = openai_compat_chat(req).expect("ok");
1371
1372 assert_eq!(resp.output_text, "hi");
1373 assert_eq!(resp.model, "custom-model");
1374 assert_eq!(resp.usage.input_tokens, Some(7));
1375 assert_eq!(resp.usage.output_tokens, Some(2));
1376 assert_eq!(resp.usage.total_tokens, Some(9));
1377 assert_eq!(resp.stop_reason.as_deref(), Some("stop"));
1378
1379 let cap = captured.lock().unwrap().take().expect("captured");
1380 assert_eq!(cap.method, "POST");
1381 assert_eq!(cap.path, "/chat/completions");
1382 let has_auth = cap
1383 .headers
1384 .iter()
1385 .any(|(k, v)| k.eq_ignore_ascii_case("authorization") && v == "Bearer sk-test");
1386 assert!(has_auth, "Authorization header missing");
1387 let has_custom = cap
1388 .headers
1389 .iter()
1390 .any(|(k, v)| k.eq_ignore_ascii_case("x-custom-tag") && v == "abc");
1391 assert!(has_custom, "extra header missing");
1392 assert!(cap.body.contains("\"model\":\"custom-model\""));
1393 }
1394
1395 #[test]
1396 fn openai_compat_embeddings_roundtrip_with_dimensions() {
1397 let body = r#"{
1398 "object":"list",
1399 "model":"embed-model",
1400 "data":[{"object":"embedding","index":0,"embedding":[0.5,0.25]}],
1401 "usage":{"prompt_tokens":4,"total_tokens":4}
1402 }"#;
1403 let (base, captured) = spawn_mock(200, body);
1404
1405 let req = OpenAiCompatEmbeddingsRequest {
1406 api_base: base,
1407 api_key: "sk-emb".to_string(),
1408 model: "embed-model".to_string(),
1409 inputs: vec!["hello".to_string()],
1410 dimensions: Some(2),
1411 extra_headers: vec![],
1412 };
1413 let resp = openai_compat_embeddings(req).expect("ok");
1414
1415 assert_eq!(resp.embeddings.len(), 1);
1416 assert_eq!(resp.embeddings[0], vec![0.5_f32, 0.25_f32]);
1417 assert_eq!(resp.usage.total_tokens, Some(4));
1418 assert_eq!(resp.usage.input_tokens, Some(4));
1419
1420 let cap = captured.lock().unwrap().take().expect("captured");
1421 assert_eq!(cap.path, "/embeddings");
1422 assert!(cap.body.contains("\"dimensions\":2"));
1423 }
1424
1425 #[test]
1426 fn openai_compat_chat_non_2xx_returns_structured_error() {
1427 let body = r#"{"error":{"message":"bad api key","type":"invalid_request_error"}}"#;
1428 let (base, _captured) = spawn_mock(401, body);
1429
1430 let req = OpenAiCompatChatRequest {
1431 api_base: base,
1432 api_key: "bad".to_string(),
1433 model: "m".to_string(),
1434 prompt: "hi".to_string(),
1435 temperature: None,
1436 seed: None,
1437 max_output_tokens: None,
1438 extra_headers: vec![],
1439 };
1440 let err = openai_compat_chat(req).unwrap_err().to_string();
1441 assert!(err.contains("status 401"), "got: {err}");
1442 assert!(err.contains("bad api key"), "got: {err}");
1443 }
1444
1445 #[test]
1446 fn openai_compat_chat_rejects_empty_model_and_prompt() {
1447 let req = OpenAiCompatChatRequest {
1448 api_base: "http://localhost:1".to_string(),
1449 api_key: "k".to_string(),
1450 model: " ".to_string(),
1451 prompt: "hi".to_string(),
1452 temperature: None,
1453 seed: None,
1454 max_output_tokens: None,
1455 extra_headers: vec![],
1456 };
1457 let err = openai_compat_chat(req).unwrap_err().to_string();
1458 assert!(err.contains("model cannot be empty"), "got: {err}");
1459
1460 let req = OpenAiCompatChatRequest {
1461 api_base: "http://localhost:1".to_string(),
1462 api_key: "k".to_string(),
1463 model: "m".to_string(),
1464 prompt: " ".to_string(),
1465 temperature: None,
1466 seed: None,
1467 max_output_tokens: None,
1468 extra_headers: vec![],
1469 };
1470 let err = openai_compat_chat(req).unwrap_err().to_string();
1471 assert!(err.contains("prompt cannot be empty"), "got: {err}");
1472 }
1473
1474 #[test]
1475 fn parse_provider_mode_recognizes_all_three_tokens() {
1476 assert_eq!(
1477 parse_provider_mode("openai-compat"),
1478 Some(AiProviderMode::OpenAiCompat)
1479 );
1480 assert_eq!(
1481 parse_provider_mode("OPENAI_NATIVE"),
1482 Some(AiProviderMode::OpenAiNative)
1483 );
1484 assert_eq!(
1485 parse_provider_mode("anthropic-native"),
1486 Some(AiProviderMode::AnthropicNative)
1487 );
1488 assert_eq!(parse_provider_mode("groq"), None);
1489 }
1490
1491 #[test]
1492 fn resolve_provider_mode_reads_kv_key() {
1493 let kv = |key: &str| -> crate::RedDBResult<Option<String>> {
1494 if key == "red.config.ai.provider" {
1495 Ok(Some("anthropic-native".to_string()))
1496 } else {
1497 Ok(None)
1498 }
1499 };
1500 assert_eq!(
1501 resolve_provider_mode(&kv),
1502 Some(AiProviderMode::AnthropicNative)
1503 );
1504 }
1505
1506 #[test]
1507 fn resolve_default_provider_honors_mode_key() {
1508 let kv = |key: &str| -> crate::RedDBResult<Option<String>> {
1509 match key {
1510 "red.config.ai.provider" => Ok(Some("anthropic-native".to_string())),
1511 "red.config.ai.default.provider" => Ok(Some("groq".to_string())),
1512 _ => Ok(None),
1513 }
1514 };
1515 assert_eq!(resolve_default_provider(&kv), AiProvider::Anthropic);
1516 }
1517
1518 #[tokio::test]
1519 async fn anthropic_prompt_async_rejects_empty_api_key() {
1520 let transport = crate::runtime::ai::transport::AiTransport::new(Default::default());
1521 let request = AnthropicPromptRequest {
1522 api_key: " ".to_string(),
1523 model: "claude-3-5-haiku-latest".to_string(),
1524 prompt: "hello".to_string(),
1525 temperature: None,
1526 max_output_tokens: None,
1527 api_base: "https://api.anthropic.com/v1".to_string(),
1528 anthropic_version: DEFAULT_ANTHROPIC_VERSION.to_string(),
1529 };
1530 let err = anthropic_prompt_async(&transport, request)
1531 .await
1532 .unwrap_err();
1533 assert!(err.to_string().contains("API key cannot be empty"));
1534 }
1535}
1536
1537#[derive(Debug, Clone, PartialEq, Eq)]
1543pub enum AiProvider {
1544 OpenAi,
1545 Anthropic,
1546 Groq,
1547 OpenRouter,
1548 Together,
1549 Venice,
1550 Ollama,
1551 DeepSeek,
1552 HuggingFace,
1553 Local,
1554 Custom(String),
1555}
1556
1557impl AiProvider {
1558 pub fn token(&self) -> &str {
1559 match self {
1560 Self::OpenAi => "openai",
1561 Self::Anthropic => "anthropic",
1562 Self::Groq => "groq",
1563 Self::OpenRouter => "openrouter",
1564 Self::Together => "together",
1565 Self::Venice => "venice",
1566 Self::Ollama => "ollama",
1567 Self::DeepSeek => "deepseek",
1568 Self::HuggingFace => "huggingface",
1569 Self::Local => "local",
1570 Self::Custom(name) => name.as_str(),
1571 }
1572 }
1573
1574 pub fn default_prompt_model(&self) -> &str {
1575 match self {
1576 Self::OpenAi => DEFAULT_OPENAI_PROMPT_MODEL,
1577 Self::Anthropic => DEFAULT_ANTHROPIC_PROMPT_MODEL,
1578 Self::Groq => "llama-3.3-70b-versatile",
1579 Self::OpenRouter => "auto",
1580 Self::Together => "meta-llama/Meta-Llama-3-8B-Instruct",
1581 Self::Venice => "llama-3.3-70b",
1582 Self::Ollama => "llama3",
1583 Self::DeepSeek => "deepseek-chat",
1584 Self::HuggingFace => "mistralai/Mistral-7B-Instruct-v0.3",
1585 Self::Local => "sentence-transformers/all-MiniLM-L6-v2",
1586 Self::Custom(_) => DEFAULT_OPENAI_PROMPT_MODEL,
1587 }
1588 }
1589
1590 pub fn prompt_model_env_name(&self) -> String {
1591 format!("REDDB_{}_PROMPT_MODEL", self.token().to_ascii_uppercase())
1592 }
1593
1594 pub fn default_embedding_model(&self) -> &str {
1595 match self {
1596 Self::Ollama => "nomic-embed-text",
1597 Self::HuggingFace | Self::Local => "sentence-transformers/all-MiniLM-L6-v2",
1598 _ => DEFAULT_OPENAI_EMBEDDING_MODEL,
1599 }
1600 }
1601
1602 pub fn default_api_base(&self) -> &str {
1603 match self {
1604 Self::OpenAi => DEFAULT_OPENAI_API_BASE,
1605 Self::Anthropic => DEFAULT_ANTHROPIC_API_BASE,
1606 Self::Groq => "https://api.groq.com/openai/v1",
1607 Self::OpenRouter => "https://openrouter.ai/api/v1",
1608 Self::Together => "https://api.together.xyz/v1",
1609 Self::Venice => "https://api.venice.ai/api/v1",
1610 Self::Ollama => "http://localhost:11434/v1",
1611 Self::DeepSeek => "https://api.deepseek.com/v1",
1612 Self::HuggingFace => "https://api-inference.huggingface.co",
1613 Self::Local => "local",
1614 Self::Custom(base) => base.as_str(),
1615 }
1616 }
1617
1618 pub fn api_base_env_name(&self) -> String {
1619 format!("REDDB_{}_API_BASE", self.token().to_ascii_uppercase())
1620 }
1621
1622 pub fn default_key_env_name(&self) -> String {
1623 format!("REDDB_{}_API_KEY", self.token().to_ascii_uppercase())
1624 }
1625
1626 pub fn alias_key_env_name(&self, alias: &str) -> String {
1627 let normalized = normalize_alias_token(alias);
1628 format!(
1629 "REDDB_{}_API_KEY_{normalized}",
1630 self.token().to_ascii_uppercase()
1631 )
1632 }
1633
1634 pub fn resolve_api_base(&self) -> String {
1635 if let Ok(value) = std::env::var(self.api_base_env_name()) {
1636 let value = value.trim().to_string();
1637 if !value.is_empty() {
1638 return value;
1639 }
1640 }
1641 self.default_api_base().to_string()
1642 }
1643
1644 pub fn resolve_api_base_with_kv<F>(&self, alias: &str, kv_getter: &F) -> String
1646 where
1647 F: Fn(&str) -> crate::RedDBResult<Option<String>>,
1648 {
1649 if let Ok(value) = std::env::var(self.api_base_env_name()) {
1651 let value = value.trim().to_string();
1652 if !value.is_empty() {
1653 return value;
1654 }
1655 }
1656 let kv_key = format!("red.config.ai.{}.{alias}.base_url", self.token());
1658 if let Ok(Some(value)) = kv_getter(&kv_key) {
1659 let value = value.trim().to_string();
1660 if !value.is_empty() {
1661 return value;
1662 }
1663 }
1664 self.default_api_base().to_string()
1665 }
1666
1667 pub fn is_openai_compatible(&self) -> bool {
1669 matches!(
1670 self,
1671 Self::OpenAi
1672 | Self::Groq
1673 | Self::OpenRouter
1674 | Self::Together
1675 | Self::Venice
1676 | Self::Ollama
1677 | Self::DeepSeek
1678 | Self::Custom(_)
1679 )
1680 }
1681
1682 pub fn requires_api_key(&self) -> bool {
1684 !matches!(self, Self::Ollama | Self::Local)
1685 }
1686}
1687
1688pub fn parse_provider(name: &str) -> crate::RedDBResult<AiProvider> {
1690 match name.trim().to_ascii_lowercase().as_str() {
1691 "openai" => Ok(AiProvider::OpenAi),
1692 "anthropic" => Ok(AiProvider::Anthropic),
1693 "groq" => Ok(AiProvider::Groq),
1694 "openrouter" | "open_router" => Ok(AiProvider::OpenRouter),
1695 "together" => Ok(AiProvider::Together),
1696 "venice" => Ok(AiProvider::Venice),
1697 "ollama" => Ok(AiProvider::Ollama),
1698 "deepseek" | "deep_seek" => Ok(AiProvider::DeepSeek),
1699 "huggingface" | "hf" => Ok(AiProvider::HuggingFace),
1700 "local" => Ok(AiProvider::Local),
1701 other => {
1702 if other.starts_with("http://") || other.starts_with("https://") {
1704 Ok(AiProvider::Custom(other.to_string()))
1705 } else {
1706 Err(crate::RedDBError::Query(format!(
1707 "unsupported AI provider '{other}'; expected: openai, anthropic, groq, \
1708 openrouter, together, venice, ollama, deepseek, huggingface, local"
1709 )))
1710 }
1711 }
1712 }
1713}
1714
1715pub fn resolve_default_provider<F>(kv_getter: &F) -> AiProvider
1720where
1721 F: Fn(&str) -> crate::RedDBResult<Option<String>>,
1722{
1723 if let Some(mode) = resolve_provider_mode(kv_getter) {
1726 return provider_mode_to_provider(mode);
1727 }
1728 if let Ok(value) = std::env::var("REDDB_AI_PROVIDER") {
1730 let value = value.trim().to_string();
1731 if !value.is_empty() {
1732 if let Ok(provider) = parse_provider(&value) {
1733 return provider;
1734 }
1735 }
1736 }
1737 if let Ok(Some(value)) = kv_getter("red.config.ai.default.provider") {
1739 let value = value.trim().to_string();
1740 if !value.is_empty() {
1741 if let Ok(provider) = parse_provider(&value) {
1742 return provider;
1743 }
1744 }
1745 }
1746 AiProvider::OpenAi
1747}
1748
1749pub fn resolve_default_model<F>(provider: &AiProvider, kv_getter: &F) -> String
1754where
1755 F: Fn(&str) -> crate::RedDBResult<Option<String>>,
1756{
1757 if let Ok(value) = std::env::var("REDDB_AI_MODEL") {
1759 let value = value.trim().to_string();
1760 if !value.is_empty() {
1761 return value;
1762 }
1763 }
1764 if let Ok(value) = std::env::var(provider.prompt_model_env_name()) {
1766 let value = value.trim().to_string();
1767 if !value.is_empty() {
1768 return value;
1769 }
1770 }
1771 if let Ok(Some(value)) = kv_getter("red.config.ai.default.model") {
1773 let value = value.trim().to_string();
1774 if !value.is_empty() {
1775 return value;
1776 }
1777 }
1778 provider.default_prompt_model().to_string()
1779}
1780
1781pub fn resolve_defaults_from_runtime(
1783 runtime: &crate::runtime::RedDBRuntime,
1784) -> (AiProvider, String) {
1785 use crate::application::ports::RuntimeEntityPort;
1786 let kv_getter = |key: &str| -> crate::RedDBResult<Option<String>> {
1787 match runtime.get_kv("red_config", key)? {
1788 Some((crate::storage::schema::Value::Text(s), _)) => Ok(Some(s.to_string())),
1789 _ => Ok(None),
1790 }
1791 };
1792 let provider = resolve_default_provider(&kv_getter);
1793 let model = resolve_default_model(&provider, &kv_getter);
1794 (provider, model)
1795}
1796
1797pub fn resolve_defaults_from_runtime_port<
1799 P: crate::application::ports::RuntimeEntityPort + ?Sized,
1800>(
1801 runtime: &P,
1802) -> (AiProvider, String) {
1803 let kv_getter = |key: &str| -> crate::RedDBResult<Option<String>> {
1804 match runtime.get_kv("red_config", key)? {
1805 Some((crate::storage::schema::Value::Text(s), _)) => Ok(Some(s.to_string())),
1806 _ => Ok(None),
1807 }
1808 };
1809 let provider = resolve_default_provider(&kv_getter);
1810 let model = resolve_default_model(&provider, &kv_getter);
1811 (provider, model)
1812}
1813
1814pub fn resolve_api_key<F>(
1823 provider: &AiProvider,
1824 credential_alias: Option<&str>,
1825 kv_getter: F,
1826) -> crate::RedDBResult<String>
1827where
1828 F: Fn(&str) -> crate::RedDBResult<Option<String>>,
1829{
1830 if !provider.requires_api_key() {
1832 if let Ok(value) = std::env::var(provider.default_key_env_name()) {
1834 let value = value.trim().to_string();
1835 if !value.is_empty() {
1836 return Ok(value);
1837 }
1838 }
1839 return Ok(String::new());
1840 }
1841
1842 if let Some(alias) = credential_alias.map(str::trim).filter(|a| !a.is_empty()) {
1843 if let Some(key) = resolve_key_from_env_alias(provider, alias) {
1845 return Ok(key);
1846 }
1847 if let Some(key) = kv_getter(&ai_api_secret_path(provider, alias))? {
1848 if !key.trim().is_empty() {
1849 return Ok(key);
1850 }
1851 }
1852 if let Some(secret_ref) = kv_getter(&ai_api_secret_ref_config_key(provider, alias))? {
1853 if let Some(key) = kv_getter(secret_ref.trim())? {
1854 if !key.trim().is_empty() {
1855 return Ok(key);
1856 }
1857 }
1858 }
1859 let legacy_key = ai_api_legacy_config_key(provider, alias);
1860 if let Some(key) = kv_getter(&legacy_key)? {
1861 if !key.trim().is_empty() {
1862 return Ok(key);
1863 }
1864 }
1865 return Err(crate::RedDBError::Query(format!(
1866 "credential '{alias}' not found for {}. Set env {} or store it in the vault",
1867 provider.token(),
1868 provider.alias_key_env_name(alias)
1869 )));
1870 }
1871
1872 if let Ok(value) = std::env::var(provider.default_key_env_name()) {
1874 let value = value.trim().to_string();
1875 if !value.is_empty() {
1876 return Ok(value);
1877 }
1878 }
1879
1880 if let Some(key) = kv_getter(&ai_api_secret_path(provider, "default"))? {
1881 if !key.trim().is_empty() {
1882 return Ok(key);
1883 }
1884 }
1885 if let Some(secret_ref) = kv_getter(&ai_api_secret_ref_config_key(provider, "default"))? {
1886 if let Some(key) = kv_getter(secret_ref.trim())? {
1887 if !key.trim().is_empty() {
1888 return Ok(key);
1889 }
1890 }
1891 }
1892 if let Some(key) = kv_getter(&ai_api_legacy_config_key(provider, "default"))? {
1893 if !key.trim().is_empty() {
1894 return Ok(key);
1895 }
1896 }
1897
1898 let legacy_short_key = format!("{}/default", provider.token());
1899 if let Some(key) = kv_getter(&legacy_short_key)? {
1900 if !key.trim().is_empty() {
1901 return Ok(key);
1902 }
1903 }
1904
1905 Err(crate::RedDBError::Query(format!(
1906 "missing {} API key. Set {} or provide credential alias",
1907 provider.token(),
1908 provider.default_key_env_name()
1909 )))
1910}
1911
1912pub fn ai_api_secret_path(provider: &AiProvider, alias: &str) -> String {
1913 format!(
1914 "red.secret.ai.{}.{}.api_key",
1915 provider.token(),
1916 normalize_credential_alias_path(alias)
1917 )
1918}
1919
1920pub fn ai_api_secret_ref_config_key(provider: &AiProvider, alias: &str) -> String {
1921 format!(
1922 "red.config.ai.{}.{}.secret_ref",
1923 provider.token(),
1924 normalize_credential_alias_path(alias)
1925 )
1926}
1927
1928pub fn ai_api_legacy_config_key(provider: &AiProvider, alias: &str) -> String {
1929 format!(
1930 "red.config.ai.{}.{}.key",
1931 provider.token(),
1932 normalize_credential_alias_path(alias)
1933 )
1934}
1935
1936fn normalize_credential_alias_path(alias: &str) -> String {
1937 let alias = alias.trim();
1938 if alias.is_empty() {
1939 "default".to_string()
1940 } else {
1941 alias.to_ascii_lowercase()
1942 }
1943}
1944
1945fn resolve_key_from_env_alias(provider: &AiProvider, alias: &str) -> Option<String> {
1946 let env_name = provider.alias_key_env_name(alias);
1947 std::env::var(env_name)
1948 .ok()
1949 .map(|v| v.trim().to_string())
1950 .filter(|v| !v.is_empty())
1951}
1952
1953fn normalize_alias_token(alias: &str) -> String {
1954 let mut out = String::with_capacity(alias.len());
1955 for character in alias.chars() {
1956 if character.is_ascii_alphanumeric() {
1957 out.push(character.to_ascii_uppercase());
1958 } else {
1959 out.push('_');
1960 }
1961 }
1962 while out.contains("__") {
1963 out = out.replace("__", "_");
1964 }
1965 out.trim_matches('_').to_string()
1966}
1967
1968pub fn resolve_api_key_from_runtime(
1977 provider: &AiProvider,
1978 credential_alias: Option<&str>,
1979 runtime: &crate::runtime::RedDBRuntime,
1980) -> crate::RedDBResult<String> {
1981 use crate::application::ports::RuntimeEntityPort;
1982 let alias_for_audit = credential_alias.unwrap_or("default").to_string();
1983 let provider_token = provider.token().to_string();
1984 let audited_paths: std::cell::RefCell<Vec<(String, bool)>> =
1985 std::cell::RefCell::new(Vec::new());
1986 let result = resolve_api_key(provider, credential_alias, |kv_key| {
1987 if kv_key.starts_with("red.secret.") {
1988 let value = runtime.vault_kv_get(kv_key);
1989 audited_paths
1990 .borrow_mut()
1991 .push((kv_key.to_string(), value.is_some()));
1992 return Ok(value);
1993 }
1994 match runtime.get_kv("red_config", kv_key)? {
1995 Some((crate::storage::schema::Value::Text(secret), _)) => {
1996 audited_paths.borrow_mut().push((kv_key.to_string(), true));
1997 Ok(Some(secret.to_string()))
1998 }
1999 Some(_) => {
2000 audited_paths.borrow_mut().push((kv_key.to_string(), false));
2001 Ok(None)
2002 }
2003 None => {
2004 audited_paths.borrow_mut().push((kv_key.to_string(), false));
2005 Ok(None)
2006 }
2007 }
2008 });
2009 let audited_paths = audited_paths.into_inner();
2010
2011 let principal = crate::runtime::impl_core::current_auth_identity_for_audit()
2012 .map(|(user, _role)| user)
2013 .unwrap_or_else(|| "system".to_string());
2014 let outcome = if result.is_ok() { "hit" } else { "miss" };
2015 let target = format!("ai.credential:{provider_token}/{alias_for_audit}");
2016 let paths_json: Vec<crate::serde_json::Value> = audited_paths
2017 .iter()
2018 .map(|(p, hit)| {
2019 crate::serde_json::json!({
2020 "path": p,
2021 "hit": hit,
2022 })
2023 })
2024 .collect();
2025 let details = crate::serde_json::json!({
2026 "provider": provider_token,
2027 "alias": alias_for_audit,
2028 "paths_checked": paths_json,
2029 });
2030 runtime.audit_log().record(
2031 "ai.credential.resolve",
2032 &principal,
2033 &target,
2034 outcome,
2035 details,
2036 );
2037 result
2038}
2039
2040pub fn huggingface_embeddings(
2046 api_key: &str,
2047 model: &str,
2048 inputs: &[String],
2049 api_base: &str,
2050) -> crate::RedDBResult<OpenAiEmbeddingResponse> {
2051 let url = format!("{api_base}/pipeline/feature-extraction/{model}");
2052 let mut embeddings = Vec::with_capacity(inputs.len());
2053
2054 for input in inputs {
2055 let payload = crate::serde_json::json!({ "inputs": input }).to_string_compact();
2056 let (status, body_str) = http_post_json(&url, api_key, &[], payload, 90)
2057 .map_err(|e| crate::RedDBError::Query(format!("HuggingFace API error: {e}")))?;
2058 if !(200..300).contains(&status) {
2059 return Err(crate::RedDBError::Query(format!(
2060 "HuggingFace API error (status {status}): {body_str}"
2061 )));
2062 }
2063 let body: JsonValue = crate::serde_json::from_str(&body_str).map_err(|e| {
2064 crate::RedDBError::Query(format!("HuggingFace response parse error: {e}"))
2065 })?;
2066
2067 let vector: Vec<f32> = match &body {
2069 JsonValue::Array(outer) => outer
2070 .iter()
2071 .filter_map(|v| v.as_f64().map(|n| n as f32))
2072 .collect(),
2073 _ => {
2074 return Err(crate::RedDBError::Query(
2075 "unexpected HuggingFace embedding response format".to_string(),
2076 ))
2077 }
2078 };
2079 embeddings.push(vector);
2080 }
2081
2082 Ok(OpenAiEmbeddingResponse {
2083 provider: "huggingface",
2084 model: model.to_string(),
2085 embeddings,
2086 prompt_tokens: None,
2087 total_tokens: None,
2088 })
2089}
2090
2091pub fn huggingface_prompt(
2093 api_key: &str,
2094 model: &str,
2095 prompt: &str,
2096 temperature: Option<f32>,
2097 max_tokens: Option<usize>,
2098 api_base: &str,
2099) -> crate::RedDBResult<AiPromptResponse> {
2100 let url = format!("{api_base}/models/{model}");
2101 let mut params = Map::new();
2102 if let Some(t) = temperature {
2103 params.insert("temperature".into(), JsonValue::Number(t as f64));
2104 }
2105 params.insert(
2106 "max_new_tokens".into(),
2107 JsonValue::Number(max_tokens.unwrap_or(512) as f64),
2108 );
2109 let payload = crate::serde_json::json!({
2110 "inputs": prompt,
2111 "parameters": JsonValue::Object(params)
2112 });
2113
2114 let (status, body_str) =
2115 http_post_json(&url, api_key, &[], payload.to_string_compact(), 120)
2116 .map_err(|e| crate::RedDBError::Query(format!("HuggingFace API error: {e}")))?;
2117 if !(200..300).contains(&status) {
2118 return Err(crate::RedDBError::Query(format!(
2119 "HuggingFace API error (status {status}): {body_str}"
2120 )));
2121 }
2122 let body: JsonValue = crate::serde_json::from_str(&body_str)
2123 .map_err(|e| crate::RedDBError::Query(format!("HuggingFace response parse error: {e}")))?;
2124
2125 let output_text = match &body {
2126 JsonValue::Array(arr) => arr
2127 .first()
2128 .and_then(|v| v.get("generated_text"))
2129 .and_then(JsonValue::as_str)
2130 .unwrap_or("")
2131 .to_string(),
2132 _ => body
2133 .get("generated_text")
2134 .and_then(JsonValue::as_str)
2135 .unwrap_or("")
2136 .to_string(),
2137 };
2138
2139 Ok(AiPromptResponse {
2140 provider: "huggingface",
2141 model: model.to_string(),
2142 output_text,
2143 output_chunks: None,
2144 prompt_tokens: None,
2145 completion_tokens: None,
2146 total_tokens: None,
2147 stop_reason: None,
2148 })
2149}
2150
2151const LOCAL_MODELS_DISABLED_MESSAGE: &str = "local embeddings require the `local-models` feature \
2156flag at engine build time. Build with: cargo build --features local-models. Alternatively, use \
2157the 'ollama' provider with a local Ollama server.";
2158
2159const LOCAL_EMBEDDINGS_NOT_IMPLEMENTED_MESSAGE: &str = "local embeddings are registered by the \
2160`local-models` feature, but local model artifact execution is not implemented in this slice. \
2161Alternatively, use the 'ollama' provider with a local Ollama server.";
2162
2163const LOCAL_PROMPT_OUT_OF_SCOPE_MESSAGE: &str = "local prompt and generation are out of scope for \
2164the `local-models` feature; the local provider contract is embeddings-only for this slice.";
2165
2166pub fn local_embeddings_unavailable_error() -> crate::RedDBError {
2167 if cfg!(feature = "local-models") {
2168 crate::RedDBError::Query(LOCAL_EMBEDDINGS_NOT_IMPLEMENTED_MESSAGE.to_string())
2169 } else {
2170 crate::RedDBError::FeatureNotEnabled(LOCAL_MODELS_DISABLED_MESSAGE.to_string())
2171 }
2172}
2173
2174pub fn local_prompt_unavailable_error() -> crate::RedDBError {
2175 crate::RedDBError::Query(LOCAL_PROMPT_OUT_OF_SCOPE_MESSAGE.to_string())
2176}
2177
2178pub fn local_embeddings(
2180 _model_id: &str,
2181 _texts: &[String],
2182) -> crate::RedDBResult<OpenAiEmbeddingResponse> {
2183 Err(local_embeddings_unavailable_error())
2184}
2185
2186pub fn local_prompt(_model_id: &str, _prompt: &str) -> crate::RedDBResult<AiPromptResponse> {
2188 Err(local_prompt_unavailable_error())
2189}
2190
2191fn grpc_collect_embedding_inputs(
2204 runtime: &crate::runtime::RedDBRuntime,
2205 payload: &JsonValue,
2206) -> crate::RedDBResult<Vec<String>> {
2207 if let Some(source_query) = payload
2208 .get("source_query")
2209 .and_then(|v| v.as_str())
2210 .map(str::trim)
2211 .filter(|s| !s.is_empty())
2212 {
2213 return grpc_collect_inputs_from_source_query(runtime, payload, source_query);
2214 }
2215
2216 if let Some(arr) = payload.get("inputs").and_then(|v| v.as_array()) {
2217 let mut out = Vec::with_capacity(arr.len());
2218 for (idx, v) in arr.iter().enumerate() {
2219 let text = v.as_str().ok_or_else(|| {
2220 crate::RedDBError::Query(format!("field 'inputs[{idx}]' must be a string"))
2221 })?;
2222 if text.trim().is_empty() {
2223 return Err(crate::RedDBError::Query(format!(
2224 "field 'inputs[{idx}]' cannot be empty"
2225 )));
2226 }
2227 out.push(text.to_string());
2228 }
2229 if out.is_empty() {
2230 return Err(crate::RedDBError::Query(
2231 "field 'inputs' must be a non-empty array of strings".to_string(),
2232 ));
2233 }
2234 return Ok(out);
2235 }
2236
2237 if let Some(single) = payload
2238 .get("input")
2239 .and_then(|v| v.as_str())
2240 .map(str::trim)
2241 .filter(|s| !s.is_empty())
2242 {
2243 return Ok(vec![single.to_string()]);
2244 }
2245
2246 Err(crate::RedDBError::Query(
2247 "provide either 'input', 'inputs', or 'source_query'".to_string(),
2248 ))
2249}
2250
2251fn grpc_collect_inputs_from_source_query(
2252 runtime: &crate::runtime::RedDBRuntime,
2253 payload: &JsonValue,
2254 source_query: &str,
2255) -> crate::RedDBResult<Vec<String>> {
2256 let result = runtime
2257 .execute_query(source_query)
2258 .map_err(|err| crate::RedDBError::Query(format!("source_query failed: {err}")))?;
2259
2260 let source_mode = payload
2261 .get("source_mode")
2262 .and_then(|v| v.as_str())
2263 .map(str::trim)
2264 .filter(|s| !s.is_empty())
2265 .unwrap_or("row")
2266 .to_ascii_lowercase();
2267
2268 let mut out: Vec<String> = Vec::new();
2269 match source_mode.as_str() {
2270 "row" => {
2271 let field = payload
2272 .get("source_field")
2273 .and_then(|v| v.as_str())
2274 .map(str::trim)
2275 .filter(|s| !s.is_empty())
2276 .ok_or_else(|| {
2277 crate::RedDBError::Query(
2278 "field 'source_field' is required when source_mode='row'".to_string(),
2279 )
2280 })?;
2281 for rec in &result.result.records {
2282 for (key, value) in rec.iter_fields() {
2283 if key.as_ref() == field {
2284 if let crate::storage::schema::Value::Text(text) = value {
2285 let trimmed = text.trim();
2286 if !trimmed.is_empty() {
2287 out.push(trimmed.to_string());
2288 }
2289 }
2290 }
2291 }
2292 }
2293 }
2294 "result" => {
2295 for rec in &result.result.records {
2296 for (_, value) in rec.iter_fields() {
2297 if let crate::storage::schema::Value::Text(text) = value {
2298 let trimmed = text.trim();
2299 if !trimmed.is_empty() {
2300 out.push(trimmed.to_string());
2301 }
2302 }
2303 }
2304 }
2305 }
2306 other => {
2307 return Err(crate::RedDBError::Query(format!(
2308 "field 'source_mode' must be 'row' or 'result' (got '{other}')"
2309 )));
2310 }
2311 }
2312
2313 if out.is_empty() {
2314 return Err(crate::RedDBError::Query(
2315 "source_query produced zero non-empty text inputs".to_string(),
2316 ));
2317 }
2318 Ok(out)
2319}
2320
2321pub fn grpc_embeddings(
2343 runtime: &crate::runtime::RedDBRuntime,
2344 payload: &JsonValue,
2345) -> crate::RedDBResult<JsonValue> {
2346 let provider_name = payload
2347 .get("provider")
2348 .and_then(|v| v.as_str())
2349 .map(str::trim)
2350 .filter(|s| !s.is_empty())
2351 .unwrap_or("openai");
2352 let provider = parse_provider(provider_name)?;
2353 match &provider {
2358 AiProvider::Anthropic => {
2359 return Err(crate::RedDBError::Query(
2360 "Anthropic does not offer an embeddings API. \
2361 Re-issue the request against an OpenAI-compatible \
2362 provider (openai, groq, ollama, openrouter, together, \
2363 venice, deepseek), HuggingFace, or a custom base URL — \
2364 RedDB does not silently route embeddings to a \
2365 different provider than the one you named."
2366 .to_string(),
2367 ));
2368 }
2369 AiProvider::Local => {
2370 return grpc_embeddings_local(runtime, payload);
2371 }
2372 _ => {}
2373 }
2374
2375 let inputs: Vec<String> = grpc_collect_embedding_inputs(runtime, payload)?;
2376
2377 let model = payload
2378 .get("model")
2379 .and_then(|v| v.as_str())
2380 .map(str::trim)
2381 .filter(|s| !s.is_empty())
2382 .map(str::to_string)
2383 .or_else(|| {
2384 std::env::var(format!(
2385 "REDDB_{}_EMBEDDING_MODEL",
2386 provider.token().to_ascii_uppercase()
2387 ))
2388 .ok()
2389 })
2390 .or_else(|| std::env::var("REDDB_OPENAI_EMBEDDING_MODEL").ok())
2391 .filter(|v| !v.trim().is_empty())
2392 .unwrap_or_else(|| provider.default_embedding_model().to_string());
2393
2394 let credential = payload
2395 .get("credential")
2396 .and_then(|v| v.as_str())
2397 .map(str::to_string);
2398 let api_key = resolve_api_key_from_runtime(&provider, credential.as_deref(), runtime)?;
2399
2400 let dimensions = payload
2401 .get("dimensions")
2402 .and_then(|v| v.as_i64())
2403 .and_then(|v| usize::try_from(v).ok())
2404 .filter(|v| *v > 0);
2405
2406 let response = match &provider {
2407 AiProvider::HuggingFace => {
2408 huggingface_embeddings(&api_key, &model, &inputs, &provider.resolve_api_base())?
2409 }
2410 _ => {
2411 let transport = crate::runtime::ai::transport::AiTransport::from_runtime(runtime);
2412 let request = OpenAiEmbeddingRequest {
2413 api_key,
2414 model,
2415 inputs,
2416 dimensions,
2417 api_base: provider.resolve_api_base(),
2418 };
2419 crate::runtime::ai::block_on_ai(async move {
2420 openai_embeddings_async(&transport, request).await
2421 })
2422 .and_then(|result| result)?
2423 }
2424 };
2425
2426 let embeddings_json: Vec<JsonValue> = response
2427 .embeddings
2428 .into_iter()
2429 .map(|vec| {
2430 JsonValue::Array(
2431 vec.into_iter()
2432 .map(|f| JsonValue::Number(f as f64))
2433 .collect(),
2434 )
2435 })
2436 .collect();
2437
2438 let mut obj = Map::new();
2439 obj.insert(
2440 "provider".to_string(),
2441 JsonValue::String(response.provider.to_string()),
2442 );
2443 obj.insert("model".to_string(), JsonValue::String(response.model));
2444 obj.insert("embeddings".to_string(), JsonValue::Array(embeddings_json));
2445 if let Some(pt) = response.prompt_tokens {
2446 obj.insert("prompt_tokens".to_string(), JsonValue::Number(pt as f64));
2447 }
2448 if let Some(tt) = response.total_tokens {
2449 obj.insert("total_tokens".to_string(), JsonValue::Number(tt as f64));
2450 }
2451 Ok(JsonValue::Object(obj))
2452}
2453
2454fn grpc_embeddings_local(
2463 runtime: &crate::runtime::RedDBRuntime,
2464 payload: &JsonValue,
2465) -> crate::RedDBResult<JsonValue> {
2466 let model_name = payload
2467 .get("model")
2468 .and_then(|v| v.as_str())
2469 .map(str::trim)
2470 .filter(|s| !s.is_empty())
2471 .ok_or_else(|| {
2472 crate::RedDBError::Query(
2473 "field 'model' is required for the local provider and must be the \
2474 registered local model name (see POST /ai/models)"
2475 .to_string(),
2476 )
2477 })?
2478 .to_string();
2479
2480 let inputs = grpc_collect_embedding_inputs(runtime, payload)?;
2481 let response = crate::runtime::ai::local_embedding::embed_local(runtime, &model_name, inputs)?;
2482
2483 let embeddings_json: Vec<JsonValue> = response
2484 .embeddings
2485 .into_iter()
2486 .map(|vec| {
2487 JsonValue::Array(
2488 vec.into_iter()
2489 .map(|f| JsonValue::Number(f as f64))
2490 .collect(),
2491 )
2492 })
2493 .collect();
2494
2495 let mut obj = Map::new();
2496 obj.insert(
2497 "provider".to_string(),
2498 JsonValue::String(response.provider.to_string()),
2499 );
2500 obj.insert("model".to_string(), JsonValue::String(response.name));
2501 obj.insert(
2502 "model_source".to_string(),
2503 JsonValue::String(response.source),
2504 );
2505 obj.insert(
2506 "model_revision".to_string(),
2507 JsonValue::String(response.revision),
2508 );
2509 obj.insert(
2510 "model_engine".to_string(),
2511 JsonValue::String(response.engine),
2512 );
2513 obj.insert(
2514 "dimensions".to_string(),
2515 JsonValue::Number(response.dimensions as f64),
2516 );
2517 obj.insert("embeddings".to_string(), JsonValue::Array(embeddings_json));
2518 Ok(JsonValue::Object(obj))
2519}
2520
2521pub fn grpc_prompt(
2523 _runtime: &crate::runtime::RedDBRuntime,
2524 _payload: &JsonValue,
2525) -> crate::RedDBResult<JsonValue> {
2526 Err(crate::RedDBError::FeatureNotEnabled(
2527 "AI prompt via gRPC requires HTTP endpoint; use POST /ai/prompt".to_string(),
2528 ))
2529}
2530
2531pub fn grpc_credentials(
2533 _runtime: &crate::runtime::RedDBRuntime,
2534 _payload: &JsonValue,
2535) -> crate::RedDBResult<JsonValue> {
2536 Err(crate::RedDBError::FeatureNotEnabled(
2537 "AI credentials via gRPC requires HTTP endpoint; use POST /ai/credentials".to_string(),
2538 ))
2539}
2540
2541#[derive(Debug, Clone, Default, PartialEq, Eq)]
2557pub struct OpenAiCompatUsage {
2558 pub input_tokens: Option<u64>,
2559 pub output_tokens: Option<u64>,
2560 pub total_tokens: Option<u64>,
2561}
2562
2563#[derive(Debug, Clone)]
2564pub struct OpenAiCompatChatRequest {
2565 pub api_base: String,
2566 pub api_key: String,
2567 pub model: String,
2568 pub prompt: String,
2569 pub temperature: Option<f32>,
2570 pub seed: Option<u64>,
2571 pub max_output_tokens: Option<usize>,
2572 pub extra_headers: Vec<(String, String)>,
2573}
2574
2575#[derive(Debug, Clone)]
2576pub struct OpenAiCompatChatResponse {
2577 pub model: String,
2578 pub output_text: String,
2579 pub stop_reason: Option<String>,
2580 pub usage: OpenAiCompatUsage,
2581}
2582
2583#[derive(Debug, Clone)]
2584pub struct OpenAiCompatEmbeddingsRequest {
2585 pub api_base: String,
2586 pub api_key: String,
2587 pub model: String,
2588 pub inputs: Vec<String>,
2589 pub dimensions: Option<usize>,
2590 pub extra_headers: Vec<(String, String)>,
2591}
2592
2593#[derive(Debug, Clone)]
2594pub struct OpenAiCompatEmbeddingsResponse {
2595 pub model: String,
2596 pub embeddings: Vec<Vec<f32>>,
2597 pub usage: OpenAiCompatUsage,
2598}
2599
2600fn extra_header_refs(headers: &[(String, String)]) -> Vec<(&str, &str)> {
2601 headers
2602 .iter()
2603 .map(|(k, v)| (k.as_str(), v.as_str()))
2604 .collect()
2605}
2606
2607pub fn openai_compat_chat(
2615 request: OpenAiCompatChatRequest,
2616) -> RedDBResult<OpenAiCompatChatResponse> {
2617 if request.model.trim().is_empty() {
2618 return Err(RedDBError::Query(
2619 "openai-compat: model cannot be empty".to_string(),
2620 ));
2621 }
2622 if request.prompt.trim().is_empty() {
2623 return Err(RedDBError::Query(
2624 "openai-compat: prompt cannot be empty".to_string(),
2625 ));
2626 }
2627
2628 let url = format!(
2629 "{}/chat/completions",
2630 request.api_base.trim_end_matches('/')
2631 );
2632 let payload = build_openai_prompt_payload(
2633 &request.model,
2634 &request.prompt,
2635 request.temperature,
2636 request.seed,
2637 request.max_output_tokens,
2638 false,
2639 );
2640
2641 let extra = extra_header_refs(&request.extra_headers);
2642 let (status, body) = http_post_json(&url, &request.api_key, &extra, payload, 120)
2643 .map_err(|err| RedDBError::Query(format!("openai-compat transport error: {err}")))?;
2644
2645 if !(200..300).contains(&status) {
2646 let message = openai_error_message(&body).unwrap_or_else(|| {
2647 if body.trim().is_empty() {
2648 "openai-compat chat request failed".to_string()
2649 } else {
2650 body.clone()
2651 }
2652 });
2653 return Err(RedDBError::Query(format!(
2654 "openai-compat chat request failed (status {status}): {message}"
2655 )));
2656 }
2657
2658 let parsed = parse_openai_prompt_response(&body, &request.model)?;
2659 Ok(OpenAiCompatChatResponse {
2660 model: parsed.model,
2661 output_text: parsed.output_text,
2662 stop_reason: parsed.stop_reason,
2663 usage: OpenAiCompatUsage {
2664 input_tokens: parsed.prompt_tokens,
2665 output_tokens: parsed.completion_tokens,
2666 total_tokens: parsed.total_tokens,
2667 },
2668 })
2669}
2670
2671pub fn openai_compat_embeddings(
2673 request: OpenAiCompatEmbeddingsRequest,
2674) -> RedDBResult<OpenAiCompatEmbeddingsResponse> {
2675 if request.model.trim().is_empty() {
2676 return Err(RedDBError::Query(
2677 "openai-compat: embedding model cannot be empty".to_string(),
2678 ));
2679 }
2680 if request.inputs.is_empty() {
2681 return Err(RedDBError::Query(
2682 "openai-compat: at least one input is required".to_string(),
2683 ));
2684 }
2685
2686 let url = format!("{}/embeddings", request.api_base.trim_end_matches('/'));
2687 let payload =
2688 build_openai_embedding_payload(&request.model, &request.inputs, request.dimensions);
2689
2690 let extra = extra_header_refs(&request.extra_headers);
2691 let (status, body) = http_post_json(&url, &request.api_key, &extra, payload, 90)
2692 .map_err(|err| RedDBError::Query(format!("openai-compat transport error: {err}")))?;
2693
2694 if !(200..300).contains(&status) {
2695 let message = openai_error_message(&body).unwrap_or_else(|| {
2696 if body.trim().is_empty() {
2697 "openai-compat embeddings request failed".to_string()
2698 } else {
2699 body.clone()
2700 }
2701 });
2702 return Err(RedDBError::Query(format!(
2703 "openai-compat embeddings request failed (status {status}): {message}"
2704 )));
2705 }
2706
2707 let parsed = parse_openai_embedding_response(&body)?;
2708 Ok(OpenAiCompatEmbeddingsResponse {
2709 model: parsed.model,
2710 embeddings: parsed.embeddings,
2711 usage: OpenAiCompatUsage {
2712 input_tokens: parsed.prompt_tokens,
2713 output_tokens: None,
2714 total_tokens: parsed.total_tokens,
2715 },
2716 })
2717}
2718
2719#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2731pub enum AiProviderMode {
2732 OpenAiCompat,
2734 OpenAiNative,
2736 AnthropicNative,
2738}
2739
2740impl AiProviderMode {
2741 pub fn token(&self) -> &'static str {
2742 match self {
2743 Self::OpenAiCompat => "openai-compat",
2744 Self::OpenAiNative => "openai-native",
2745 Self::AnthropicNative => "anthropic-native",
2746 }
2747 }
2748}
2749
2750pub fn parse_provider_mode(name: &str) -> Option<AiProviderMode> {
2752 match name.trim().to_ascii_lowercase().as_str() {
2753 "openai-compat" | "openai_compat" | "openaicompat" => Some(AiProviderMode::OpenAiCompat),
2754 "openai-native" | "openai_native" | "openainative" => Some(AiProviderMode::OpenAiNative),
2755 "anthropic-native" | "anthropic_native" | "anthropicnative" => {
2756 Some(AiProviderMode::AnthropicNative)
2757 }
2758 _ => None,
2759 }
2760}
2761
2762pub fn resolve_provider_mode<F>(kv_getter: &F) -> Option<AiProviderMode>
2768where
2769 F: Fn(&str) -> crate::RedDBResult<Option<String>>,
2770{
2771 if let Ok(value) = std::env::var("REDDB_AI_PROVIDER_MODE") {
2772 if let Some(mode) = parse_provider_mode(&value) {
2773 return Some(mode);
2774 }
2775 }
2776 if let Ok(Some(value)) = kv_getter("red.config.ai.provider") {
2777 if let Some(mode) = parse_provider_mode(&value) {
2778 return Some(mode);
2779 }
2780 }
2781 None
2782}
2783
2784pub fn provider_mode_to_provider(mode: AiProviderMode) -> AiProvider {
2788 match mode {
2789 AiProviderMode::OpenAiNative => AiProvider::OpenAi,
2790 AiProviderMode::AnthropicNative => AiProvider::Anthropic,
2791 AiProviderMode::OpenAiCompat => AiProvider::Custom(String::new()),
2792 }
2793}