1use std::io::BufRead;
7use std::time::Duration;
8
9use crate::json::{parse_json, Map, Value as JsonValue};
10use crate::{RedDBError, RedDBResult};
11
12fn http_post_json(
18 url: &str,
19 api_key: &str,
20 extra_headers: &[(&str, &str)],
21 payload: String,
22 read_timeout_secs: u64,
23) -> Result<(u16, String), String> {
24 let agent: ureq::Agent = ureq::Agent::config_builder()
25 .timeout_connect(Some(Duration::from_secs(10)))
26 .timeout_send_request(Some(Duration::from_secs(30)))
27 .timeout_recv_response(Some(Duration::from_secs(read_timeout_secs)))
28 .timeout_recv_body(Some(Duration::from_secs(read_timeout_secs)))
29 .http_status_as_error(false)
30 .build()
31 .into();
32
33 let mut req = agent
34 .post(url)
35 .header("Content-Type", "application/json")
36 .header("Accept", "application/json");
37 for (k, v) in extra_headers {
38 req = req.header(*k, *v);
39 }
40 let trimmed_key = api_key.trim();
41 if !trimmed_key.is_empty() {
42 req = req.header("Authorization", &format!("Bearer {}", trimmed_key));
43 }
44
45 match req.send(payload) {
46 Ok(mut resp) => {
47 let status = resp.status().as_u16();
48 let body = resp
49 .body_mut()
50 .read_to_string()
51 .map_err(|err| format!("failed to read response body: {err}"))?;
52 Ok((status, body))
53 }
54 Err(err) => Err(format!("{err}")),
55 }
56}
57
58pub const DEFAULT_OPENAI_EMBEDDING_MODEL: &str = "text-embedding-3-small";
59pub const DEFAULT_OPENAI_API_BASE: &str = "https://api.openai.com/v1";
60pub const DEFAULT_OPENAI_PROMPT_MODEL: &str = "gpt-4.1-mini";
61pub const DEFAULT_ANTHROPIC_PROMPT_MODEL: &str = "claude-3-5-haiku-latest";
62pub const DEFAULT_ANTHROPIC_API_BASE: &str = "https://api.anthropic.com/v1";
63pub const DEFAULT_ANTHROPIC_VERSION: &str = "2023-06-01";
64
65#[derive(Debug, Clone)]
66pub struct OpenAiEmbeddingRequest {
67 pub api_key: String,
68 pub model: String,
69 pub inputs: Vec<String>,
70 pub dimensions: Option<usize>,
71 pub api_base: String,
72}
73
74#[derive(Debug, Clone)]
75pub struct OpenAiEmbeddingResponse {
76 pub provider: &'static str,
77 pub model: String,
78 pub embeddings: Vec<Vec<f32>>,
79 pub prompt_tokens: Option<u64>,
80 pub total_tokens: Option<u64>,
81}
82
83#[derive(Debug, Clone)]
84pub struct OpenAiPromptRequest {
85 pub api_key: String,
86 pub model: String,
87 pub prompt: String,
88 pub temperature: Option<f32>,
89 pub seed: Option<u64>,
90 pub max_output_tokens: Option<usize>,
91 pub api_base: String,
92 pub stream: bool,
93}
94
95#[derive(Debug, Clone)]
96pub struct AnthropicPromptRequest {
97 pub api_key: String,
98 pub model: String,
99 pub prompt: String,
100 pub temperature: Option<f32>,
101 pub max_output_tokens: Option<usize>,
102 pub api_base: String,
103 pub anthropic_version: String,
104}
105
106#[derive(Debug, Clone)]
107pub struct AiPromptResponse {
108 pub provider: &'static str,
109 pub model: String,
110 pub output_text: String,
111 pub output_chunks: Option<Vec<String>>,
112 pub prompt_tokens: Option<u64>,
113 pub completion_tokens: Option<u64>,
114 pub total_tokens: Option<u64>,
115 pub stop_reason: Option<String>,
116}
117
118#[deprecated(
119 since = "1.0.0",
120 note = "use AiBatchClient::embed_batch for embeddings or openai_embeddings_async with AiTransport when token usage metadata is required"
121)]
122pub fn openai_embeddings(request: OpenAiEmbeddingRequest) -> RedDBResult<OpenAiEmbeddingResponse> {
123 if request.model.trim().is_empty() {
124 return Err(RedDBError::Query(
125 "OpenAI embedding model cannot be empty".to_string(),
126 ));
127 }
128 if request.inputs.is_empty() {
129 return Err(RedDBError::Query(
130 "at least one input is required for embeddings".to_string(),
131 ));
132 }
133
134 let url = format!("{}/embeddings", request.api_base.trim_end_matches('/'));
135 let payload =
136 build_openai_embedding_payload(&request.model, &request.inputs, request.dimensions);
137
138 let (status, body) = http_post_json(&url, &request.api_key, &[], payload, 90)
139 .map_err(|err| RedDBError::Query(format!("OpenAI transport error: {err}")))?;
140
141 if !(200..300).contains(&status) {
142 let message = openai_error_message(&body)
143 .unwrap_or_else(|| "OpenAI embeddings request failed".to_string());
144 return Err(RedDBError::Query(format!(
145 "OpenAI embeddings request failed (status {status}): {message}"
146 )));
147 }
148
149 parse_openai_embedding_response(&body)
150}
151
152#[deprecated(since = "1.0.0", note = "use openai_prompt_async with AiTransport")]
153pub fn openai_prompt(request: OpenAiPromptRequest) -> RedDBResult<AiPromptResponse> {
154 if request.model.trim().is_empty() {
155 return Err(RedDBError::Query(
156 "OpenAI prompt model cannot be empty".to_string(),
157 ));
158 }
159 if request.prompt.trim().is_empty() {
160 return Err(RedDBError::Query("prompt cannot be empty".to_string()));
161 }
162
163 let url = format!(
164 "{}/chat/completions",
165 request.api_base.trim_end_matches('/')
166 );
167 let payload = build_openai_prompt_payload(
168 &request.model,
169 &request.prompt,
170 request.temperature,
171 request.seed,
172 request.max_output_tokens,
173 false,
174 );
175
176 let (status, body) = http_post_json(&url, &request.api_key, &[], payload, 120)
177 .map_err(|err| RedDBError::Query(format!("OpenAI transport error: {err}")))?;
178
179 if !(200..300).contains(&status) {
180 let message = openai_error_message(&body)
181 .unwrap_or_else(|| "OpenAI prompt request failed".to_string());
182 return Err(RedDBError::Query(format!(
183 "OpenAI prompt request failed (status {status}): {message}"
184 )));
185 }
186
187 parse_openai_prompt_response(&body, &request.model)
188}
189
190#[deprecated(since = "1.0.0", note = "use anthropic_prompt_async with AiTransport")]
191pub fn anthropic_prompt(request: AnthropicPromptRequest) -> RedDBResult<AiPromptResponse> {
192 if request.api_key.trim().is_empty() {
193 return Err(RedDBError::Query(
194 "Anthropic API key cannot be empty".to_string(),
195 ));
196 }
197 if request.model.trim().is_empty() {
198 return Err(RedDBError::Query(
199 "Anthropic model cannot be empty".to_string(),
200 ));
201 }
202 if request.prompt.trim().is_empty() {
203 return Err(RedDBError::Query("prompt cannot be empty".to_string()));
204 }
205
206 let url = format!("{}/messages", request.api_base.trim_end_matches('/'));
207 let payload = build_anthropic_prompt_payload(
208 &request.model,
209 &request.prompt,
210 request.temperature,
211 request.max_output_tokens,
212 );
213
214 let extra = [
219 ("x-api-key", request.api_key.as_str()),
220 ("anthropic-version", request.anthropic_version.as_str()),
221 ];
222 let (status, body) = http_post_json(&url, "", &extra, payload, 120)
223 .map_err(|err| RedDBError::Query(format!("Anthropic transport error: {err}")))?;
224
225 if !(200..300).contains(&status) {
226 let message = anthropic_error_message(&body)
227 .unwrap_or_else(|| "Anthropic prompt request failed".to_string());
228 return Err(RedDBError::Query(format!(
229 "Anthropic prompt request failed (status {status}): {message}"
230 )));
231 }
232
233 parse_anthropic_prompt_response(&body, &request.model)
234}
235
236pub async fn openai_embeddings_async(
241 transport: &crate::runtime::ai::transport::AiTransport,
242 request: OpenAiEmbeddingRequest,
243) -> RedDBResult<OpenAiEmbeddingResponse> {
244 if request.model.trim().is_empty() {
245 return Err(RedDBError::Query(
246 "OpenAI embedding model cannot be empty".to_string(),
247 ));
248 }
249 if request.inputs.is_empty() {
250 return Err(RedDBError::Query(
251 "at least one input is required for embeddings".to_string(),
252 ));
253 }
254
255 let url = format!("{}/embeddings", request.api_base.trim_end_matches('/'));
256 let payload =
257 build_openai_embedding_payload(&request.model, &request.inputs, request.dimensions);
258 let mut http_req =
259 crate::runtime::ai::transport::AiHttpRequest::post_json("openai-compatible", url, payload);
260 let trimmed_key = request.api_key.trim();
261 if !trimmed_key.is_empty() {
262 http_req = http_req.header("authorization", format!("Bearer {}", trimmed_key));
263 }
264
265 let response = transport
266 .request(http_req)
267 .await
268 .map_err(|e| RedDBError::Query(e.to_string()))?;
269
270 parse_openai_embedding_response(&response.body)
271}
272
273pub async fn openai_prompt_async(
278 transport: &crate::runtime::ai::transport::AiTransport,
279 request: OpenAiPromptRequest,
280) -> RedDBResult<AiPromptResponse> {
281 if request.model.trim().is_empty() {
282 return Err(RedDBError::Query(
283 "OpenAI prompt model cannot be empty".to_string(),
284 ));
285 }
286 if request.prompt.trim().is_empty() {
287 return Err(RedDBError::Query("prompt cannot be empty".to_string()));
288 }
289
290 let url = format!(
291 "{}/chat/completions",
292 request.api_base.trim_end_matches('/')
293 );
294 let payload = build_openai_prompt_payload(
295 &request.model,
296 &request.prompt,
297 request.temperature,
298 request.seed,
299 request.max_output_tokens,
300 request.stream,
301 );
302 let http_req = crate::runtime::ai::transport::AiHttpRequest::post_json("openai", url, payload)
303 .model(request.model.clone())
304 .header("authorization", format!("Bearer {}", request.api_key));
305
306 let response = transport
307 .request(http_req)
308 .await
309 .map_err(|e| RedDBError::Query(e.to_string()))?;
310
311 if request.stream {
312 parse_openai_streaming_prompt_response(&response.body, &request.model)
313 } else {
314 parse_openai_prompt_response(&response.body, &request.model)
315 }
316}
317
318pub fn openai_prompt_streaming(
324 request: OpenAiPromptRequest,
325 mut on_chunk: impl FnMut(&str) -> RedDBResult<()>,
326) -> RedDBResult<AiPromptResponse> {
327 if request.model.trim().is_empty() {
328 return Err(RedDBError::Query(
329 "OpenAI prompt model cannot be empty".to_string(),
330 ));
331 }
332 if request.prompt.trim().is_empty() {
333 return Err(RedDBError::Query("prompt cannot be empty".to_string()));
334 }
335
336 let url = format!(
337 "{}/chat/completions",
338 request.api_base.trim_end_matches('/')
339 );
340 let payload = build_openai_prompt_payload(
341 &request.model,
342 &request.prompt,
343 request.temperature,
344 request.seed,
345 request.max_output_tokens,
346 true,
347 );
348
349 let agent: ureq::Agent = ureq::Agent::config_builder()
350 .timeout_connect(Some(Duration::from_secs(10)))
351 .timeout_send_request(Some(Duration::from_secs(30)))
352 .timeout_recv_response(Some(Duration::from_secs(120)))
353 .timeout_recv_body(Some(Duration::from_secs(120)))
354 .http_status_as_error(false)
355 .build()
356 .into();
357
358 let mut req = agent
359 .post(&url)
360 .header("content-type", "application/json")
361 .header("accept", "text/event-stream");
362 let trimmed_key = request.api_key.trim();
363 if !trimmed_key.is_empty() {
364 req = req.header("authorization", &format!("Bearer {}", trimmed_key));
365 }
366
367 let mut response = req
368 .send(payload)
369 .map_err(|err| RedDBError::Query(format!("OpenAI transport error: {err}")))?;
370 let status = response.status().as_u16();
371 if !(200..300).contains(&status) {
372 let body = response
373 .body_mut()
374 .read_to_string()
375 .unwrap_or_else(|err| format!("failed to read response body: {err}"));
376 let message = openai_error_message(&body)
377 .unwrap_or_else(|| "OpenAI prompt request failed".to_string());
378 return Err(RedDBError::Query(format!(
379 "OpenAI prompt request failed (status {status}): {message}"
380 )));
381 }
382
383 let mut model = request.model;
384 let mut chunks = Vec::new();
385 let mut prompt_tokens = None;
386 let mut completion_tokens = None;
387 let mut total_tokens = None;
388 let mut stop_reason = None;
389
390 let mut reader = std::io::BufReader::new(response.body_mut().as_reader());
391 let mut line = String::new();
392 loop {
393 line.clear();
394 let read = reader.read_line(&mut line).map_err(|err| {
395 RedDBError::Query(format!("failed to read OpenAI streaming response: {err}"))
396 })?;
397 if read == 0 {
398 break;
399 }
400
401 let trimmed = line.trim();
402 let Some(data) = trimmed.strip_prefix("data:") else {
403 continue;
404 };
405 let data = data.trim();
406 if data.is_empty() {
407 continue;
408 }
409 if data == "[DONE]" {
410 break;
411 }
412
413 let parsed = parse_json(data).map_err(|err| {
414 RedDBError::Query(format!(
415 "invalid OpenAI streaming prompt JSON response: {err}"
416 ))
417 })?;
418 let json = JsonValue::from(parsed);
419 if let Some(value) = json.get("model").and_then(JsonValue::as_str) {
420 model = value.to_string();
421 }
422 if let Some(usage) = json.get("usage") {
423 prompt_tokens = usage
424 .get("prompt_tokens")
425 .and_then(JsonValue::as_i64)
426 .and_then(|value| u64::try_from(value).ok())
427 .or(prompt_tokens);
428 completion_tokens = usage
429 .get("completion_tokens")
430 .and_then(JsonValue::as_i64)
431 .and_then(|value| u64::try_from(value).ok())
432 .or(completion_tokens);
433 total_tokens = usage
434 .get("total_tokens")
435 .and_then(JsonValue::as_i64)
436 .and_then(|value| u64::try_from(value).ok())
437 .or(total_tokens);
438 }
439
440 let Some(choices) = json.get("choices").and_then(JsonValue::as_array) else {
441 continue;
442 };
443 let Some(first_choice) = choices.first() else {
444 continue;
445 };
446 if let Some(reason) = first_choice
447 .get("finish_reason")
448 .and_then(JsonValue::as_str)
449 {
450 stop_reason = Some(reason.to_string());
451 }
452 if let Some(text) = first_choice
453 .get("delta")
454 .and_then(|delta| delta.get("content"))
455 .and_then(JsonValue::as_str)
456 {
457 if !text.is_empty() {
458 on_chunk(text)?;
459 chunks.push(text.to_string());
460 }
461 }
462 }
463
464 if chunks.is_empty() {
465 return Err(RedDBError::Query(
466 "OpenAI streaming prompt response missing text content".to_string(),
467 ));
468 }
469
470 let output_text = chunks.concat();
471 let total_tokens = total_tokens.or_else(|| match (prompt_tokens, completion_tokens) {
472 (Some(prompt), Some(completion)) => Some(prompt.saturating_add(completion)),
473 _ => None,
474 });
475
476 Ok(AiPromptResponse {
477 provider: "openai",
478 model,
479 output_text,
480 output_chunks: Some(chunks),
481 prompt_tokens,
482 completion_tokens,
483 total_tokens,
484 stop_reason,
485 })
486}
487
488pub async fn anthropic_prompt_async(
493 transport: &crate::runtime::ai::transport::AiTransport,
494 request: AnthropicPromptRequest,
495) -> RedDBResult<AiPromptResponse> {
496 if request.api_key.trim().is_empty() {
497 return Err(RedDBError::Query(
498 "Anthropic API key cannot be empty".to_string(),
499 ));
500 }
501 if request.model.trim().is_empty() {
502 return Err(RedDBError::Query(
503 "Anthropic model cannot be empty".to_string(),
504 ));
505 }
506 if request.prompt.trim().is_empty() {
507 return Err(RedDBError::Query("prompt cannot be empty".to_string()));
508 }
509
510 let url = format!("{}/messages", request.api_base.trim_end_matches('/'));
511 let payload = build_anthropic_prompt_payload(
512 &request.model,
513 &request.prompt,
514 request.temperature,
515 request.max_output_tokens,
516 );
517 let http_req =
518 crate::runtime::ai::transport::AiHttpRequest::post_json("anthropic", url, payload)
519 .model(request.model.clone())
520 .header("x-api-key", request.api_key)
521 .header("anthropic-version", request.anthropic_version);
522
523 let response = transport
524 .request(http_req)
525 .await
526 .map_err(|e| RedDBError::Query(e.to_string()))?;
527
528 parse_anthropic_prompt_response(&response.body, &request.model)
529}
530
531pub(crate) fn build_embedding_payload(model: &str, inputs: &[String]) -> String {
533 build_openai_embedding_payload(model, inputs, None)
534}
535
536pub(crate) fn parse_embedding_vectors(body: &str) -> Result<Vec<Vec<f32>>, String> {
538 parse_openai_embedding_response(body)
539 .map(|r| r.embeddings)
540 .map_err(|e| e.to_string())
541}
542
543pub(crate) fn parse_embedding_response(body: &str) -> Result<OpenAiEmbeddingResponse, String> {
544 parse_openai_embedding_response(body).map_err(|e| e.to_string())
545}
546
547fn build_openai_embedding_payload(
548 model: &str,
549 inputs: &[String],
550 dimensions: Option<usize>,
551) -> String {
552 let mut object = Map::new();
553 object.insert("model".to_string(), JsonValue::String(model.to_string()));
554 if inputs.len() == 1 {
555 object.insert("input".to_string(), JsonValue::String(inputs[0].clone()));
556 } else {
557 object.insert(
558 "input".to_string(),
559 JsonValue::Array(inputs.iter().cloned().map(JsonValue::String).collect()),
560 );
561 }
562 if let Some(dimensions) = dimensions {
563 object.insert(
564 "dimensions".to_string(),
565 JsonValue::Number(dimensions as f64),
566 );
567 }
568 object.insert(
569 "encoding_format".to_string(),
570 JsonValue::String("float".to_string()),
571 );
572 JsonValue::Object(object).to_string_compact()
573}
574
575fn openai_error_message(body: &str) -> Option<String> {
576 provider_error_message(body)
577}
578
579fn anthropic_error_message(body: &str) -> Option<String> {
580 provider_error_message(body)
581}
582
583fn provider_error_message(body: &str) -> Option<String> {
584 let parsed = parse_json(body).ok().map(JsonValue::from)?;
585 let error = parsed.get("error")?;
586 if let Some(message) = error.get("message").and_then(JsonValue::as_str) {
587 let trimmed = message.trim();
588 if !trimmed.is_empty() {
589 return Some(trimmed.to_string());
590 }
591 }
592 None
593}
594
595fn build_openai_prompt_payload(
596 model: &str,
597 prompt: &str,
598 temperature: Option<f32>,
599 seed: Option<u64>,
600 max_output_tokens: Option<usize>,
601 stream: bool,
602) -> String {
603 let mut object = Map::new();
604 object.insert("model".to_string(), JsonValue::String(model.to_string()));
605
606 let mut message = Map::new();
607 message.insert("role".to_string(), JsonValue::String("user".to_string()));
608 message.insert("content".to_string(), JsonValue::String(prompt.to_string()));
609 object.insert(
610 "messages".to_string(),
611 JsonValue::Array(vec![JsonValue::Object(message)]),
612 );
613
614 if let Some(temperature) = temperature {
615 object.insert(
616 "temperature".to_string(),
617 JsonValue::Number(temperature as f64),
618 );
619 }
620
621 if let Some(seed) = seed {
622 object.insert("seed".to_string(), JsonValue::Number(seed as f64));
623 }
624
625 if let Some(max_output_tokens) = max_output_tokens {
626 object.insert(
627 "max_tokens".to_string(),
628 JsonValue::Number(max_output_tokens as f64),
629 );
630 }
631
632 if stream {
633 object.insert("stream".to_string(), JsonValue::Bool(true));
634 let mut options = Map::new();
635 options.insert("include_usage".to_string(), JsonValue::Bool(true));
636 object.insert("stream_options".to_string(), JsonValue::Object(options));
637 }
638
639 JsonValue::Object(object).to_string_compact()
640}
641
642fn build_anthropic_prompt_payload(
643 model: &str,
644 prompt: &str,
645 temperature: Option<f32>,
646 max_output_tokens: Option<usize>,
647) -> String {
648 let mut object = Map::new();
649 object.insert("model".to_string(), JsonValue::String(model.to_string()));
650 object.insert(
651 "max_tokens".to_string(),
652 JsonValue::Number(max_output_tokens.unwrap_or(512) as f64),
653 );
654
655 let mut message = Map::new();
656 message.insert("role".to_string(), JsonValue::String("user".to_string()));
657 message.insert("content".to_string(), JsonValue::String(prompt.to_string()));
658 object.insert(
659 "messages".to_string(),
660 JsonValue::Array(vec![JsonValue::Object(message)]),
661 );
662
663 if let Some(temperature) = temperature {
664 object.insert(
665 "temperature".to_string(),
666 JsonValue::Number(temperature as f64),
667 );
668 }
669
670 JsonValue::Object(object).to_string_compact()
671}
672
673fn extract_text_from_parts(parts: &[JsonValue]) -> Option<String> {
674 let mut chunks = Vec::new();
675 for part in parts {
676 if let Some(text) = part.as_str() {
677 let trimmed = text.trim();
678 if !trimmed.is_empty() {
679 chunks.push(trimmed.to_string());
680 }
681 continue;
682 }
683
684 let Some(object) = part.as_object() else {
685 continue;
686 };
687 let Some(text) = object.get("text").and_then(JsonValue::as_str) else {
688 continue;
689 };
690 let trimmed = text.trim();
691 if !trimmed.is_empty() {
692 chunks.push(trimmed.to_string());
693 }
694 }
695
696 if chunks.is_empty() {
697 None
698 } else {
699 Some(chunks.join("\n\n"))
700 }
701}
702
703fn parse_openai_prompt_response(
704 body: &str,
705 requested_model: &str,
706) -> RedDBResult<AiPromptResponse> {
707 let parsed = parse_json(body)
708 .map_err(|err| RedDBError::Query(format!("invalid OpenAI prompt JSON response: {err}")))?;
709 let json = JsonValue::from(parsed);
710
711 let model = json
712 .get("model")
713 .and_then(JsonValue::as_str)
714 .unwrap_or(requested_model)
715 .to_string();
716
717 let Some(choices) = json.get("choices").and_then(JsonValue::as_array) else {
718 return Err(RedDBError::Query(
719 "OpenAI prompt response missing 'choices' array".to_string(),
720 ));
721 };
722 let Some(first_choice) = choices.first() else {
723 return Err(RedDBError::Query(
724 "OpenAI prompt response contains no choices".to_string(),
725 ));
726 };
727
728 let output_text = first_choice
729 .get("message")
730 .and_then(|message| {
731 if let Some(text) = message.get("content").and_then(JsonValue::as_str) {
732 let trimmed = text.trim();
733 if !trimmed.is_empty() {
734 return Some(trimmed.to_string());
735 }
736 }
737 message
738 .get("content")
739 .and_then(JsonValue::as_array)
740 .and_then(extract_text_from_parts)
741 })
742 .ok_or_else(|| {
743 RedDBError::Query("OpenAI prompt response missing text content".to_string())
744 })?;
745
746 let prompt_tokens = json
747 .get("usage")
748 .and_then(|usage| usage.get("prompt_tokens"))
749 .and_then(JsonValue::as_i64)
750 .and_then(|value| u64::try_from(value).ok());
751 let completion_tokens = json
752 .get("usage")
753 .and_then(|usage| usage.get("completion_tokens"))
754 .and_then(JsonValue::as_i64)
755 .and_then(|value| u64::try_from(value).ok());
756 let total_tokens = json
757 .get("usage")
758 .and_then(|usage| usage.get("total_tokens"))
759 .and_then(JsonValue::as_i64)
760 .and_then(|value| u64::try_from(value).ok())
761 .or_else(|| match (prompt_tokens, completion_tokens) {
762 (Some(prompt), Some(completion)) => Some(prompt.saturating_add(completion)),
763 _ => None,
764 });
765
766 let stop_reason = first_choice
767 .get("finish_reason")
768 .and_then(JsonValue::as_str)
769 .map(str::to_string);
770
771 Ok(AiPromptResponse {
772 provider: "openai",
773 model,
774 output_text,
775 output_chunks: None,
776 prompt_tokens,
777 completion_tokens,
778 total_tokens,
779 stop_reason,
780 })
781}
782
783fn parse_openai_streaming_prompt_response(
784 body: &str,
785 requested_model: &str,
786) -> RedDBResult<AiPromptResponse> {
787 let mut model = requested_model.to_string();
788 let mut chunks = Vec::new();
789 let mut prompt_tokens = None;
790 let mut completion_tokens = None;
791 let mut total_tokens = None;
792 let mut stop_reason = None;
793
794 for line in body.lines() {
795 let line = line.trim();
796 let Some(data) = line.strip_prefix("data:") else {
797 continue;
798 };
799 let data = data.trim();
800 if data.is_empty() {
801 continue;
802 }
803 if data == "[DONE]" {
804 break;
805 }
806
807 let parsed = parse_json(data).map_err(|err| {
808 RedDBError::Query(format!(
809 "invalid OpenAI streaming prompt JSON response: {err}"
810 ))
811 })?;
812 let json = JsonValue::from(parsed);
813 if let Some(value) = json.get("model").and_then(JsonValue::as_str) {
814 model = value.to_string();
815 }
816 if let Some(usage) = json.get("usage") {
817 prompt_tokens = usage
818 .get("prompt_tokens")
819 .and_then(JsonValue::as_i64)
820 .and_then(|value| u64::try_from(value).ok())
821 .or(prompt_tokens);
822 completion_tokens = usage
823 .get("completion_tokens")
824 .and_then(JsonValue::as_i64)
825 .and_then(|value| u64::try_from(value).ok())
826 .or(completion_tokens);
827 total_tokens = usage
828 .get("total_tokens")
829 .and_then(JsonValue::as_i64)
830 .and_then(|value| u64::try_from(value).ok())
831 .or(total_tokens);
832 }
833
834 let Some(choices) = json.get("choices").and_then(JsonValue::as_array) else {
835 continue;
836 };
837 let Some(first_choice) = choices.first() else {
838 continue;
839 };
840 if let Some(reason) = first_choice
841 .get("finish_reason")
842 .and_then(JsonValue::as_str)
843 {
844 stop_reason = Some(reason.to_string());
845 }
846 if let Some(text) = first_choice
847 .get("delta")
848 .and_then(|delta| delta.get("content"))
849 .and_then(JsonValue::as_str)
850 {
851 if !text.is_empty() {
852 chunks.push(text.to_string());
853 }
854 }
855 }
856
857 if chunks.is_empty() {
858 return Err(RedDBError::Query(
859 "OpenAI streaming prompt response missing text content".to_string(),
860 ));
861 }
862
863 let output_text = chunks.concat();
864 let total_tokens = total_tokens.or_else(|| match (prompt_tokens, completion_tokens) {
865 (Some(prompt), Some(completion)) => Some(prompt.saturating_add(completion)),
866 _ => None,
867 });
868
869 Ok(AiPromptResponse {
870 provider: "openai",
871 model,
872 output_text,
873 output_chunks: Some(chunks),
874 prompt_tokens,
875 completion_tokens,
876 total_tokens,
877 stop_reason,
878 })
879}
880
881fn parse_anthropic_prompt_response(
882 body: &str,
883 requested_model: &str,
884) -> RedDBResult<AiPromptResponse> {
885 let parsed = parse_json(body).map_err(|err| {
886 RedDBError::Query(format!("invalid Anthropic prompt JSON response: {err}"))
887 })?;
888 let json = JsonValue::from(parsed);
889
890 let model = json
891 .get("model")
892 .and_then(JsonValue::as_str)
893 .unwrap_or(requested_model)
894 .to_string();
895
896 let Some(content_parts) = json.get("content").and_then(JsonValue::as_array) else {
897 return Err(RedDBError::Query(
898 "Anthropic prompt response missing 'content' array".to_string(),
899 ));
900 };
901
902 let output_text = extract_text_from_parts(content_parts).ok_or_else(|| {
903 RedDBError::Query("Anthropic prompt response missing text content".to_string())
904 })?;
905
906 let prompt_tokens = json
907 .get("usage")
908 .and_then(|usage| usage.get("input_tokens"))
909 .and_then(JsonValue::as_i64)
910 .and_then(|value| u64::try_from(value).ok());
911 let completion_tokens = json
912 .get("usage")
913 .and_then(|usage| usage.get("output_tokens"))
914 .and_then(JsonValue::as_i64)
915 .and_then(|value| u64::try_from(value).ok());
916 let total_tokens = match (prompt_tokens, completion_tokens) {
917 (Some(prompt), Some(completion)) => Some(prompt.saturating_add(completion)),
918 _ => None,
919 };
920
921 let stop_reason = json
922 .get("stop_reason")
923 .and_then(JsonValue::as_str)
924 .map(str::to_string);
925
926 Ok(AiPromptResponse {
927 provider: "anthropic",
928 model,
929 output_text,
930 output_chunks: None,
931 prompt_tokens,
932 completion_tokens,
933 total_tokens,
934 stop_reason,
935 })
936}
937
938fn parse_openai_embedding_response(body: &str) -> RedDBResult<OpenAiEmbeddingResponse> {
939 let parsed = parse_json(body).map_err(|err| {
940 RedDBError::Query(format!("invalid OpenAI embeddings JSON response: {err}"))
941 })?;
942 let json = JsonValue::from(parsed);
943
944 let model = json
945 .get("model")
946 .and_then(JsonValue::as_str)
947 .unwrap_or(DEFAULT_OPENAI_EMBEDDING_MODEL)
948 .to_string();
949
950 let Some(data) = json.get("data").and_then(JsonValue::as_array) else {
951 return Err(RedDBError::Query(
952 "OpenAI response missing 'data' array".to_string(),
953 ));
954 };
955
956 let mut rows: Vec<(usize, Vec<f32>)> = Vec::with_capacity(data.len());
957 for (position, item) in data.iter().enumerate() {
958 let index = item
959 .get("index")
960 .and_then(JsonValue::as_i64)
961 .and_then(|value| usize::try_from(value).ok())
962 .unwrap_or(position);
963
964 let Some(embedding_values) = item.get("embedding").and_then(JsonValue::as_array) else {
965 return Err(RedDBError::Query(
966 "OpenAI response contains item without 'embedding' array".to_string(),
967 ));
968 };
969 if embedding_values.is_empty() {
970 return Err(RedDBError::Query(
971 "OpenAI response contains empty embedding vector".to_string(),
972 ));
973 }
974
975 let mut embedding = Vec::with_capacity(embedding_values.len());
976 for value in embedding_values {
977 let Some(number) = value.as_f64() else {
978 return Err(RedDBError::Query(
979 "OpenAI response contains non-numeric embedding value".to_string(),
980 ));
981 };
982 embedding.push(number as f32);
983 }
984 rows.push((index, embedding));
985 }
986 rows.sort_by_key(|(index, _)| *index);
987 let embeddings = rows.into_iter().map(|(_, embedding)| embedding).collect();
988
989 let prompt_tokens = json
990 .get("usage")
991 .and_then(|usage| usage.get("prompt_tokens"))
992 .and_then(JsonValue::as_i64)
993 .and_then(|value| u64::try_from(value).ok());
994 let total_tokens = json
995 .get("usage")
996 .and_then(|usage| usage.get("total_tokens"))
997 .and_then(JsonValue::as_i64)
998 .and_then(|value| u64::try_from(value).ok());
999
1000 Ok(OpenAiEmbeddingResponse {
1001 provider: "openai",
1002 model,
1003 embeddings,
1004 prompt_tokens,
1005 total_tokens,
1006 })
1007}
1008
1009#[cfg(test)]
1010mod tests {
1011 use super::*;
1012
1013 #[test]
1014 fn parse_openai_embedding_response_extracts_vectors() {
1015 let body = r#"{
1016 "object":"list",
1017 "data":[
1018 {"object":"embedding","index":1,"embedding":[0.3,0.4]},
1019 {"object":"embedding","index":0,"embedding":[0.1,0.2]}
1020 ],
1021 "model":"text-embedding-3-small",
1022 "usage":{"prompt_tokens":12,"total_tokens":12}
1023 }"#;
1024
1025 let result = parse_openai_embedding_response(body).expect("response should parse");
1026 assert_eq!(result.provider, "openai");
1027 assert_eq!(result.model, "text-embedding-3-small");
1028 assert_eq!(result.embeddings.len(), 2);
1029 assert_eq!(result.embeddings[0], vec![0.1, 0.2]);
1030 assert_eq!(result.embeddings[1], vec![0.3, 0.4]);
1031 assert_eq!(result.prompt_tokens, Some(12));
1032 assert_eq!(result.total_tokens, Some(12));
1033 }
1034
1035 #[test]
1036 fn openai_error_message_extracts_nested_message() {
1037 let body = r#"{"error":{"message":"bad api key","type":"invalid_request_error"}}"#;
1038 assert_eq!(openai_error_message(body).as_deref(), Some("bad api key"));
1039 }
1040
1041 #[test]
1042 fn parse_openai_prompt_response_extracts_text_and_usage() {
1043 let body = r#"{
1044 "id":"chatcmpl_1",
1045 "object":"chat.completion",
1046 "model":"gpt-4.1-mini",
1047 "choices":[
1048 {
1049 "index":0,
1050 "finish_reason":"stop",
1051 "message":{"role":"assistant","content":"Resumo pronto."}
1052 }
1053 ],
1054 "usage":{"prompt_tokens":10,"completion_tokens":4,"total_tokens":14}
1055 }"#;
1056
1057 let parsed =
1058 parse_openai_prompt_response(body, DEFAULT_OPENAI_PROMPT_MODEL).expect("parse");
1059 assert_eq!(parsed.provider, "openai");
1060 assert_eq!(parsed.model, "gpt-4.1-mini");
1061 assert_eq!(parsed.output_text, "Resumo pronto.");
1062 assert_eq!(parsed.prompt_tokens, Some(10));
1063 assert_eq!(parsed.completion_tokens, Some(4));
1064 assert_eq!(parsed.total_tokens, Some(14));
1065 assert_eq!(parsed.stop_reason.as_deref(), Some("stop"));
1066 }
1067
1068 #[test]
1069 fn parse_anthropic_prompt_response_extracts_text_and_usage() {
1070 let body = r#"{
1071 "id":"msg_1",
1072 "model":"claude-3-5-haiku-latest",
1073 "type":"message",
1074 "content":[{"type":"text","text":"Action complete."}],
1075 "usage":{"input_tokens":11,"output_tokens":5},
1076 "stop_reason":"end_turn"
1077 }"#;
1078
1079 let parsed =
1080 parse_anthropic_prompt_response(body, DEFAULT_ANTHROPIC_PROMPT_MODEL).expect("parse");
1081 assert_eq!(parsed.provider, "anthropic");
1082 assert_eq!(parsed.model, "claude-3-5-haiku-latest");
1083 assert_eq!(parsed.output_text, "Action complete.");
1084 assert_eq!(parsed.prompt_tokens, Some(11));
1085 assert_eq!(parsed.completion_tokens, Some(5));
1086 assert_eq!(parsed.total_tokens, Some(16));
1087 assert_eq!(parsed.stop_reason.as_deref(), Some("end_turn"));
1088 }
1089
1090 #[test]
1091 fn resolve_api_key_prefers_vault_secret_over_legacy_config() {
1092 let provider = AiProvider::OpenAi;
1093 let alias = "vault_unit_alias";
1094 let secret_path = ai_api_secret_path(&provider, alias);
1095 let legacy_key = ai_api_legacy_config_key(&provider, alias);
1096
1097 let resolved = resolve_api_key(&provider, Some(alias), |key| {
1098 if key == secret_path {
1099 Ok(Some("vault-key".to_string()))
1100 } else if key == legacy_key {
1101 Ok(Some("legacy-key".to_string()))
1102 } else {
1103 Ok(None)
1104 }
1105 })
1106 .expect("resolve");
1107
1108 assert_eq!(resolved, "vault-key");
1109 }
1110
1111 #[test]
1112 fn resolve_api_key_uses_default_vault_secret_path() {
1113 let provider = AiProvider::OpenAi;
1114 let secret_path = ai_api_secret_path(&provider, "default");
1115
1116 let resolved = resolve_api_key(&provider, None, |key| {
1117 if key == secret_path {
1118 Ok(Some("default-vault-key".to_string()))
1119 } else {
1120 Ok(None)
1121 }
1122 })
1123 .expect("resolve");
1124
1125 assert_eq!(resolved, "default-vault-key");
1126 }
1127
1128 #[test]
1129 fn openai_prompt_payload_includes_temperature_and_seed_when_present() {
1130 let payload = build_openai_prompt_payload(
1131 "gpt-4.1-mini",
1132 "hello",
1133 Some(0.0),
1134 Some(42),
1135 Some(128),
1136 false,
1137 );
1138 let parsed = JsonValue::from(parse_json(&payload).expect("valid json"));
1139
1140 assert_eq!(
1141 parsed.get("temperature").and_then(JsonValue::as_f64),
1142 Some(0.0)
1143 );
1144 assert_eq!(parsed.get("seed").and_then(JsonValue::as_u64), Some(42));
1145 assert_eq!(
1146 parsed.get("max_tokens").and_then(JsonValue::as_u64),
1147 Some(128)
1148 );
1149 }
1150
1151 #[test]
1152 fn openai_prompt_payload_omits_seed_when_none() {
1153 let payload =
1154 build_openai_prompt_payload("gpt-4.1-mini", "hello", Some(0.0), None, None, false);
1155 let parsed = JsonValue::from(parse_json(&payload).expect("valid json"));
1156
1157 assert!(parsed.get("seed").is_none());
1158 assert!(parsed.get("stream").is_none());
1159 assert_eq!(
1160 parsed.get("temperature").and_then(JsonValue::as_f64),
1161 Some(0.0)
1162 );
1163 }
1164
1165 #[test]
1166 fn openai_prompt_payload_enables_stream_options() {
1167 let payload =
1168 build_openai_prompt_payload("gpt-4.1-mini", "hello", Some(0.0), None, None, true);
1169 let parsed = JsonValue::from(parse_json(&payload).expect("valid json"));
1170
1171 assert_eq!(
1172 parsed.get("stream").and_then(JsonValue::as_bool),
1173 Some(true)
1174 );
1175 assert_eq!(
1176 parsed
1177 .get("stream_options")
1178 .and_then(|value| value.get("include_usage"))
1179 .and_then(JsonValue::as_bool),
1180 Some(true)
1181 );
1182 }
1183
1184 #[test]
1185 fn openai_streaming_prompt_response_collects_delta_chunks() {
1186 let body = concat!(
1187 "data: {\"model\":\"gpt-test\",\"choices\":[{\"delta\":{\"content\":\"login \"},\"finish_reason\":null}]}\n\n",
1188 "data: {\"model\":\"gpt-test\",\"choices\":[{\"delta\":{\"content\":\"failed\"},\"finish_reason\":null}]}\n\n",
1189 "data: {\"model\":\"gpt-test\",\"choices\":[{\"delta\":{},\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":12,\"completion_tokens\":2,\"total_tokens\":14}}\n\n",
1190 "data: [DONE]\n\n",
1191 );
1192 let parsed = parse_openai_streaming_prompt_response(body, "fallback").unwrap();
1193
1194 assert_eq!(parsed.model, "gpt-test");
1195 assert_eq!(parsed.output_text, "login failed");
1196 assert_eq!(
1197 parsed.output_chunks.as_deref(),
1198 Some(["login ".to_string(), "failed".to_string()].as_slice())
1199 );
1200 assert_eq!(parsed.prompt_tokens, Some(12));
1201 assert_eq!(parsed.completion_tokens, Some(2));
1202 assert_eq!(parsed.total_tokens, Some(14));
1203 assert_eq!(parsed.stop_reason.as_deref(), Some("stop"));
1204 }
1205
1206 #[tokio::test]
1207 async fn openai_prompt_async_rejects_empty_model() {
1208 let transport = crate::runtime::ai::transport::AiTransport::new(Default::default());
1209 let request = OpenAiPromptRequest {
1210 api_key: "key".to_string(),
1211 model: " ".to_string(),
1212 prompt: "hello".to_string(),
1213 temperature: None,
1214 seed: None,
1215 max_output_tokens: None,
1216 api_base: "https://api.openai.com/v1".to_string(),
1217 stream: false,
1218 };
1219 let err = openai_prompt_async(&transport, request).await.unwrap_err();
1220 assert!(err.to_string().contains("model cannot be empty"));
1221 }
1222
1223 #[tokio::test]
1224 async fn openai_prompt_async_rejects_empty_prompt() {
1225 let transport = crate::runtime::ai::transport::AiTransport::new(Default::default());
1226 let request = OpenAiPromptRequest {
1227 api_key: "key".to_string(),
1228 model: "gpt-4.1-mini".to_string(),
1229 prompt: "".to_string(),
1230 temperature: None,
1231 seed: None,
1232 max_output_tokens: None,
1233 api_base: "https://api.openai.com/v1".to_string(),
1234 stream: false,
1235 };
1236 let err = openai_prompt_async(&transport, request).await.unwrap_err();
1237 assert!(err.to_string().contains("prompt cannot be empty"));
1238 }
1239
1240 use std::io::{Read as _, Write as _};
1250 use std::net::TcpListener;
1251 use std::sync::{Arc, Mutex};
1252 use std::thread;
1253
1254 struct CapturedRequest {
1255 method: String,
1256 path: String,
1257 headers: Vec<(String, String)>,
1258 body: String,
1259 }
1260
1261 fn parse_http_request(stream: &mut std::net::TcpStream) -> CapturedRequest {
1262 let mut buf = [0u8; 8192];
1263 let mut data = Vec::new();
1264 loop {
1265 let read = stream.read(&mut buf).unwrap_or(0);
1266 if read == 0 {
1267 break;
1268 }
1269 data.extend_from_slice(&buf[..read]);
1270 if let Some(idx) = data.windows(4).position(|w| w == b"\r\n\r\n") {
1271 let header_len = idx + 4;
1272 let header_str = String::from_utf8_lossy(&data[..idx]).to_string();
1273 let mut lines = header_str.split("\r\n");
1274 let request_line = lines.next().unwrap_or("");
1275 let mut parts = request_line.split_whitespace();
1276 let method = parts.next().unwrap_or("").to_string();
1277 let path = parts.next().unwrap_or("").to_string();
1278 let mut headers = Vec::new();
1279 let mut content_length: usize = 0;
1280 for line in lines {
1281 if let Some((k, v)) = line.split_once(':') {
1282 let k = k.trim().to_string();
1283 let v = v.trim().to_string();
1284 if k.eq_ignore_ascii_case("content-length") {
1285 content_length = v.parse().unwrap_or(0);
1286 }
1287 headers.push((k, v));
1288 }
1289 }
1290 while data.len() < header_len + content_length {
1291 let read = stream.read(&mut buf).unwrap_or(0);
1292 if read == 0 {
1293 break;
1294 }
1295 data.extend_from_slice(&buf[..read]);
1296 }
1297 let body = String::from_utf8_lossy(&data[header_len..header_len + content_length])
1298 .to_string();
1299 return CapturedRequest {
1300 method,
1301 path,
1302 headers,
1303 body,
1304 };
1305 }
1306 }
1307 CapturedRequest {
1308 method: String::new(),
1309 path: String::new(),
1310 headers: Vec::new(),
1311 body: String::new(),
1312 }
1313 }
1314
1315 fn spawn_mock(
1318 status: u16,
1319 response_body: &'static str,
1320 ) -> (String, Arc<Mutex<Option<CapturedRequest>>>) {
1321 let listener = TcpListener::bind("127.0.0.1:0").expect("bind");
1322 let addr = listener.local_addr().expect("addr");
1323 let captured: Arc<Mutex<Option<CapturedRequest>>> = Arc::new(Mutex::new(None));
1324 let captured_clone = Arc::clone(&captured);
1325 thread::spawn(move || {
1326 if let Ok((mut stream, _)) = listener.accept() {
1327 let req = parse_http_request(&mut stream);
1328 *captured_clone.lock().unwrap() = Some(req);
1329 let status_line = match status {
1330 200 => "200 OK",
1331 400 => "400 Bad Request",
1332 401 => "401 Unauthorized",
1333 500 => "500 Internal Server Error",
1334 _ => "200 OK",
1335 };
1336 let resp = format!(
1337 "HTTP/1.1 {status_line}\r\n\
1338 Content-Type: application/json\r\n\
1339 Content-Length: {}\r\n\
1340 Connection: close\r\n\r\n{}",
1341 response_body.len(),
1342 response_body
1343 );
1344 let _ = stream.write_all(resp.as_bytes());
1345 }
1346 });
1347 (format!("http://{}", addr), captured)
1348 }
1349
1350 #[test]
1351 fn openai_compat_chat_roundtrip_honors_arbitrary_api_base_and_headers() {
1352 let body = r#"{
1353 "id":"chatcmpl_x",
1354 "model":"custom-model",
1355 "choices":[{"index":0,"finish_reason":"stop","message":{"role":"assistant","content":"hi"}}],
1356 "usage":{"prompt_tokens":7,"completion_tokens":2,"total_tokens":9}
1357 }"#;
1358 let (base, captured) = spawn_mock(200, body);
1359
1360 let req = OpenAiCompatChatRequest {
1361 api_base: base.clone(),
1362 api_key: "sk-test".to_string(),
1363 model: "custom-model".to_string(),
1364 prompt: "say hi".to_string(),
1365 temperature: None,
1366 seed: None,
1367 max_output_tokens: None,
1368 extra_headers: vec![("X-Custom-Tag".to_string(), "abc".to_string())],
1369 };
1370 let resp = openai_compat_chat(req).expect("ok");
1371
1372 assert_eq!(resp.output_text, "hi");
1373 assert_eq!(resp.model, "custom-model");
1374 assert_eq!(resp.usage.input_tokens, Some(7));
1375 assert_eq!(resp.usage.output_tokens, Some(2));
1376 assert_eq!(resp.usage.total_tokens, Some(9));
1377 assert_eq!(resp.stop_reason.as_deref(), Some("stop"));
1378
1379 let cap = captured.lock().unwrap().take().expect("captured");
1380 assert_eq!(cap.method, "POST");
1381 assert_eq!(cap.path, "/chat/completions");
1382 let has_auth = cap
1383 .headers
1384 .iter()
1385 .any(|(k, v)| k.eq_ignore_ascii_case("authorization") && v == "Bearer sk-test");
1386 assert!(has_auth, "Authorization header missing");
1387 let has_custom = cap
1388 .headers
1389 .iter()
1390 .any(|(k, v)| k.eq_ignore_ascii_case("x-custom-tag") && v == "abc");
1391 assert!(has_custom, "extra header missing");
1392 assert!(cap.body.contains("\"model\":\"custom-model\""));
1393 }
1394
1395 #[test]
1396 fn openai_compat_embeddings_roundtrip_with_dimensions() {
1397 let body = r#"{
1398 "object":"list",
1399 "model":"embed-model",
1400 "data":[{"object":"embedding","index":0,"embedding":[0.5,0.25]}],
1401 "usage":{"prompt_tokens":4,"total_tokens":4}
1402 }"#;
1403 let (base, captured) = spawn_mock(200, body);
1404
1405 let req = OpenAiCompatEmbeddingsRequest {
1406 api_base: base,
1407 api_key: "sk-emb".to_string(),
1408 model: "embed-model".to_string(),
1409 inputs: vec!["hello".to_string()],
1410 dimensions: Some(2),
1411 extra_headers: vec![],
1412 };
1413 let resp = openai_compat_embeddings(req).expect("ok");
1414
1415 assert_eq!(resp.embeddings.len(), 1);
1416 assert_eq!(resp.embeddings[0], vec![0.5_f32, 0.25_f32]);
1417 assert_eq!(resp.usage.total_tokens, Some(4));
1418 assert_eq!(resp.usage.input_tokens, Some(4));
1419
1420 let cap = captured.lock().unwrap().take().expect("captured");
1421 assert_eq!(cap.path, "/embeddings");
1422 assert!(cap.body.contains("\"dimensions\":2"));
1423 }
1424
1425 #[test]
1426 fn openai_compat_chat_non_2xx_returns_structured_error() {
1427 let body = r#"{"error":{"message":"bad api key","type":"invalid_request_error"}}"#;
1428 let (base, _captured) = spawn_mock(401, body);
1429
1430 let req = OpenAiCompatChatRequest {
1431 api_base: base,
1432 api_key: "bad".to_string(),
1433 model: "m".to_string(),
1434 prompt: "hi".to_string(),
1435 temperature: None,
1436 seed: None,
1437 max_output_tokens: None,
1438 extra_headers: vec![],
1439 };
1440 let err = openai_compat_chat(req).unwrap_err().to_string();
1441 assert!(err.contains("status 401"), "got: {err}");
1442 assert!(err.contains("bad api key"), "got: {err}");
1443 }
1444
1445 #[test]
1446 fn openai_compat_chat_rejects_empty_model_and_prompt() {
1447 let req = OpenAiCompatChatRequest {
1448 api_base: "http://localhost:1".to_string(),
1449 api_key: "k".to_string(),
1450 model: " ".to_string(),
1451 prompt: "hi".to_string(),
1452 temperature: None,
1453 seed: None,
1454 max_output_tokens: None,
1455 extra_headers: vec![],
1456 };
1457 let err = openai_compat_chat(req).unwrap_err().to_string();
1458 assert!(err.contains("model cannot be empty"), "got: {err}");
1459
1460 let req = OpenAiCompatChatRequest {
1461 api_base: "http://localhost:1".to_string(),
1462 api_key: "k".to_string(),
1463 model: "m".to_string(),
1464 prompt: " ".to_string(),
1465 temperature: None,
1466 seed: None,
1467 max_output_tokens: None,
1468 extra_headers: vec![],
1469 };
1470 let err = openai_compat_chat(req).unwrap_err().to_string();
1471 assert!(err.contains("prompt cannot be empty"), "got: {err}");
1472 }
1473
1474 #[test]
1475 fn parse_provider_mode_recognizes_all_three_tokens() {
1476 assert_eq!(
1477 parse_provider_mode("openai-compat"),
1478 Some(AiProviderMode::OpenAiCompat)
1479 );
1480 assert_eq!(
1481 parse_provider_mode("OPENAI_NATIVE"),
1482 Some(AiProviderMode::OpenAiNative)
1483 );
1484 assert_eq!(
1485 parse_provider_mode("anthropic-native"),
1486 Some(AiProviderMode::AnthropicNative)
1487 );
1488 assert_eq!(parse_provider_mode("groq"), None);
1489 }
1490
1491 #[test]
1492 fn resolve_provider_mode_reads_kv_key() {
1493 let kv = |key: &str| -> crate::RedDBResult<Option<String>> {
1494 if key == "red.config.ai.provider" {
1495 Ok(Some("anthropic-native".to_string()))
1496 } else {
1497 Ok(None)
1498 }
1499 };
1500 assert_eq!(
1501 resolve_provider_mode(&kv),
1502 Some(AiProviderMode::AnthropicNative)
1503 );
1504 }
1505
1506 #[test]
1507 fn resolve_default_provider_honors_mode_key() {
1508 let kv = |key: &str| -> crate::RedDBResult<Option<String>> {
1509 match key {
1510 "red.config.ai.provider" => Ok(Some("anthropic-native".to_string())),
1511 "red.config.ai.default.provider" => Ok(Some("groq".to_string())),
1512 _ => Ok(None),
1513 }
1514 };
1515 assert_eq!(resolve_default_provider(&kv), AiProvider::Anthropic);
1516 }
1517
1518 #[tokio::test]
1519 async fn anthropic_prompt_async_rejects_empty_api_key() {
1520 let transport = crate::runtime::ai::transport::AiTransport::new(Default::default());
1521 let request = AnthropicPromptRequest {
1522 api_key: " ".to_string(),
1523 model: "claude-3-5-haiku-latest".to_string(),
1524 prompt: "hello".to_string(),
1525 temperature: None,
1526 max_output_tokens: None,
1527 api_base: "https://api.anthropic.com/v1".to_string(),
1528 anthropic_version: DEFAULT_ANTHROPIC_VERSION.to_string(),
1529 };
1530 let err = anthropic_prompt_async(&transport, request)
1531 .await
1532 .unwrap_err();
1533 assert!(err.to_string().contains("API key cannot be empty"));
1534 }
1535}
1536
1537#[derive(Debug, Clone, PartialEq, Eq)]
1543pub enum AiProvider {
1544 OpenAi,
1545 Anthropic,
1546 Groq,
1547 OpenRouter,
1548 Together,
1549 Venice,
1550 Ollama,
1551 DeepSeek,
1552 HuggingFace,
1553 Local,
1554 Custom(String),
1555}
1556
1557impl AiProvider {
1558 pub fn token(&self) -> &str {
1559 match self {
1560 Self::OpenAi => "openai",
1561 Self::Anthropic => "anthropic",
1562 Self::Groq => "groq",
1563 Self::OpenRouter => "openrouter",
1564 Self::Together => "together",
1565 Self::Venice => "venice",
1566 Self::Ollama => "ollama",
1567 Self::DeepSeek => "deepseek",
1568 Self::HuggingFace => "huggingface",
1569 Self::Local => "local",
1570 Self::Custom(name) => name.as_str(),
1571 }
1572 }
1573
1574 pub fn default_prompt_model(&self) -> &str {
1575 match self {
1576 Self::OpenAi => DEFAULT_OPENAI_PROMPT_MODEL,
1577 Self::Anthropic => DEFAULT_ANTHROPIC_PROMPT_MODEL,
1578 Self::Groq => "llama-3.3-70b-versatile",
1579 Self::OpenRouter => "auto",
1580 Self::Together => "meta-llama/Meta-Llama-3-8B-Instruct",
1581 Self::Venice => "llama-3.3-70b",
1582 Self::Ollama => "llama3",
1583 Self::DeepSeek => "deepseek-chat",
1584 Self::HuggingFace => "mistralai/Mistral-7B-Instruct-v0.3",
1585 Self::Local => "sentence-transformers/all-MiniLM-L6-v2",
1586 Self::Custom(_) => DEFAULT_OPENAI_PROMPT_MODEL,
1587 }
1588 }
1589
1590 pub fn prompt_model_env_name(&self) -> String {
1591 format!("REDDB_{}_PROMPT_MODEL", self.token().to_ascii_uppercase())
1592 }
1593
1594 pub fn default_embedding_model(&self) -> &str {
1595 match self {
1596 Self::Ollama => "nomic-embed-text",
1597 Self::HuggingFace | Self::Local => "sentence-transformers/all-MiniLM-L6-v2",
1598 _ => DEFAULT_OPENAI_EMBEDDING_MODEL,
1599 }
1600 }
1601
1602 pub fn default_api_base(&self) -> &str {
1603 match self {
1604 Self::OpenAi => DEFAULT_OPENAI_API_BASE,
1605 Self::Anthropic => DEFAULT_ANTHROPIC_API_BASE,
1606 Self::Groq => "https://api.groq.com/openai/v1",
1607 Self::OpenRouter => "https://openrouter.ai/api/v1",
1608 Self::Together => "https://api.together.xyz/v1",
1609 Self::Venice => "https://api.venice.ai/api/v1",
1610 Self::Ollama => "http://localhost:11434/v1",
1611 Self::DeepSeek => "https://api.deepseek.com/v1",
1612 Self::HuggingFace => "https://api-inference.huggingface.co",
1613 Self::Local => "local",
1614 Self::Custom(base) => base.as_str(),
1615 }
1616 }
1617
1618 pub fn api_base_env_name(&self) -> String {
1619 format!("REDDB_{}_API_BASE", self.token().to_ascii_uppercase())
1620 }
1621
1622 pub fn default_key_env_name(&self) -> String {
1623 format!("REDDB_{}_API_KEY", self.token().to_ascii_uppercase())
1624 }
1625
1626 pub fn alias_key_env_name(&self, alias: &str) -> String {
1627 let normalized = normalize_alias_token(alias);
1628 format!(
1629 "REDDB_{}_API_KEY_{normalized}",
1630 self.token().to_ascii_uppercase()
1631 )
1632 }
1633
1634 pub fn resolve_api_base(&self) -> String {
1635 if let Ok(value) = std::env::var(self.api_base_env_name()) {
1636 let value = value.trim().to_string();
1637 if !value.is_empty() {
1638 return value;
1639 }
1640 }
1641 self.default_api_base().to_string()
1642 }
1643
1644 pub fn resolve_api_base_with_kv<F>(&self, alias: &str, kv_getter: &F) -> String
1646 where
1647 F: Fn(&str) -> crate::RedDBResult<Option<String>>,
1648 {
1649 if let Ok(value) = std::env::var(self.api_base_env_name()) {
1651 let value = value.trim().to_string();
1652 if !value.is_empty() {
1653 return value;
1654 }
1655 }
1656 let kv_key = format!("red.config.ai.{}.{alias}.base_url", self.token());
1658 if let Ok(Some(value)) = kv_getter(&kv_key) {
1659 let value = value.trim().to_string();
1660 if !value.is_empty() {
1661 return value;
1662 }
1663 }
1664 self.default_api_base().to_string()
1665 }
1666
1667 pub fn is_openai_compatible(&self) -> bool {
1669 matches!(
1670 self,
1671 Self::OpenAi
1672 | Self::Groq
1673 | Self::OpenRouter
1674 | Self::Together
1675 | Self::Venice
1676 | Self::Ollama
1677 | Self::DeepSeek
1678 | Self::Custom(_)
1679 )
1680 }
1681
1682 pub fn requires_api_key(&self) -> bool {
1684 !matches!(self, Self::Ollama | Self::Local)
1685 }
1686}
1687
1688pub fn parse_provider(name: &str) -> crate::RedDBResult<AiProvider> {
1690 match name.trim().to_ascii_lowercase().as_str() {
1691 "openai" => Ok(AiProvider::OpenAi),
1692 "anthropic" => Ok(AiProvider::Anthropic),
1693 "groq" => Ok(AiProvider::Groq),
1694 "openrouter" | "open_router" => Ok(AiProvider::OpenRouter),
1695 "together" => Ok(AiProvider::Together),
1696 "venice" => Ok(AiProvider::Venice),
1697 "ollama" => Ok(AiProvider::Ollama),
1698 "deepseek" | "deep_seek" => Ok(AiProvider::DeepSeek),
1699 "huggingface" | "hf" => Ok(AiProvider::HuggingFace),
1700 "local" => Ok(AiProvider::Local),
1701 other => {
1702 if other.starts_with("http://") || other.starts_with("https://") {
1704 Ok(AiProvider::Custom(other.to_string()))
1705 } else {
1706 Err(crate::RedDBError::Query(format!(
1707 "unsupported AI provider '{other}'; expected: openai, anthropic, groq, \
1708 openrouter, together, venice, ollama, deepseek, huggingface, local"
1709 )))
1710 }
1711 }
1712 }
1713}
1714
1715pub fn resolve_default_provider<F>(kv_getter: &F) -> AiProvider
1720where
1721 F: Fn(&str) -> crate::RedDBResult<Option<String>>,
1722{
1723 if let Some(mode) = resolve_provider_mode(kv_getter) {
1726 return provider_mode_to_provider(mode);
1727 }
1728 if let Ok(value) = std::env::var("REDDB_AI_PROVIDER") {
1730 let value = value.trim().to_string();
1731 if !value.is_empty() {
1732 if let Ok(provider) = parse_provider(&value) {
1733 return provider;
1734 }
1735 }
1736 }
1737 if let Ok(Some(value)) = kv_getter("red.config.ai.default.provider") {
1739 let value = value.trim().to_string();
1740 if !value.is_empty() {
1741 if let Ok(provider) = parse_provider(&value) {
1742 return provider;
1743 }
1744 }
1745 }
1746 AiProvider::OpenAi
1747}
1748
1749pub fn resolve_default_model<F>(provider: &AiProvider, kv_getter: &F) -> String
1754where
1755 F: Fn(&str) -> crate::RedDBResult<Option<String>>,
1756{
1757 if let Ok(value) = std::env::var("REDDB_AI_MODEL") {
1759 let value = value.trim().to_string();
1760 if !value.is_empty() {
1761 return value;
1762 }
1763 }
1764 if let Ok(value) = std::env::var(provider.prompt_model_env_name()) {
1766 let value = value.trim().to_string();
1767 if !value.is_empty() {
1768 return value;
1769 }
1770 }
1771 if let Ok(Some(value)) = kv_getter("red.config.ai.default.model") {
1773 let value = value.trim().to_string();
1774 if !value.is_empty() {
1775 return value;
1776 }
1777 }
1778 provider.default_prompt_model().to_string()
1779}
1780
1781pub fn resolve_defaults_from_runtime(
1783 runtime: &crate::runtime::RedDBRuntime,
1784) -> (AiProvider, String) {
1785 use crate::application::ports::RuntimeEntityPort;
1786 let kv_getter = |key: &str| -> crate::RedDBResult<Option<String>> {
1787 match runtime.get_kv("red_config", key)? {
1788 Some((crate::storage::schema::Value::Text(s), _)) => Ok(Some(s.to_string())),
1789 _ => Ok(None),
1790 }
1791 };
1792 let provider = resolve_default_provider(&kv_getter);
1793 let model = resolve_default_model(&provider, &kv_getter);
1794 (provider, model)
1795}
1796
1797pub fn resolve_defaults_from_runtime_port<
1799 P: crate::application::ports::RuntimeEntityPort + ?Sized,
1800>(
1801 runtime: &P,
1802) -> (AiProvider, String) {
1803 let kv_getter = |key: &str| -> crate::RedDBResult<Option<String>> {
1804 match runtime.get_kv("red_config", key)? {
1805 Some((crate::storage::schema::Value::Text(s), _)) => Ok(Some(s.to_string())),
1806 _ => Ok(None),
1807 }
1808 };
1809 let provider = resolve_default_provider(&kv_getter);
1810 let model = resolve_default_model(&provider, &kv_getter);
1811 (provider, model)
1812}
1813
1814pub fn resolve_api_key<F>(
1823 provider: &AiProvider,
1824 credential_alias: Option<&str>,
1825 kv_getter: F,
1826) -> crate::RedDBResult<String>
1827where
1828 F: Fn(&str) -> crate::RedDBResult<Option<String>>,
1829{
1830 if !provider.requires_api_key() {
1832 if let Ok(value) = std::env::var(provider.default_key_env_name()) {
1834 let value = value.trim().to_string();
1835 if !value.is_empty() {
1836 return Ok(value);
1837 }
1838 }
1839 return Ok(String::new());
1840 }
1841
1842 if let Some(alias) = credential_alias.map(str::trim).filter(|a| !a.is_empty()) {
1843 if let Some(key) = resolve_key_from_env_alias(provider, alias) {
1845 return Ok(key);
1846 }
1847 if let Some(key) = kv_getter(&ai_api_secret_path(provider, alias))? {
1848 if !key.trim().is_empty() {
1849 return Ok(key);
1850 }
1851 }
1852 if let Some(secret_ref) = kv_getter(&ai_api_secret_ref_config_key(provider, alias))? {
1853 if let Some(key) = kv_getter(secret_ref.trim())? {
1854 if !key.trim().is_empty() {
1855 return Ok(key);
1856 }
1857 }
1858 }
1859 let legacy_key = ai_api_legacy_config_key(provider, alias);
1860 if let Some(key) = kv_getter(&legacy_key)? {
1861 if !key.trim().is_empty() {
1862 return Ok(key);
1863 }
1864 }
1865 return Err(crate::RedDBError::Query(format!(
1866 "credential '{alias}' not found for {}. Set env {} or store it in the vault",
1867 provider.token(),
1868 provider.alias_key_env_name(alias)
1869 )));
1870 }
1871
1872 if let Ok(value) = std::env::var(provider.default_key_env_name()) {
1874 let value = value.trim().to_string();
1875 if !value.is_empty() {
1876 return Ok(value);
1877 }
1878 }
1879
1880 if let Some(key) = kv_getter(&ai_api_secret_path(provider, "default"))? {
1881 if !key.trim().is_empty() {
1882 return Ok(key);
1883 }
1884 }
1885 if let Some(secret_ref) = kv_getter(&ai_api_secret_ref_config_key(provider, "default"))? {
1886 if let Some(key) = kv_getter(secret_ref.trim())? {
1887 if !key.trim().is_empty() {
1888 return Ok(key);
1889 }
1890 }
1891 }
1892 if let Some(key) = kv_getter(&ai_api_legacy_config_key(provider, "default"))? {
1893 if !key.trim().is_empty() {
1894 return Ok(key);
1895 }
1896 }
1897
1898 let legacy_short_key = format!("{}/default", provider.token());
1899 if let Some(key) = kv_getter(&legacy_short_key)? {
1900 if !key.trim().is_empty() {
1901 return Ok(key);
1902 }
1903 }
1904
1905 Err(crate::RedDBError::Query(format!(
1906 "missing {} API key. Set {} or provide credential alias",
1907 provider.token(),
1908 provider.default_key_env_name()
1909 )))
1910}
1911
1912pub fn ai_api_secret_path(provider: &AiProvider, alias: &str) -> String {
1913 format!(
1914 "red.secret.ai.{}.{}.api_key",
1915 provider.token(),
1916 normalize_credential_alias_path(alias)
1917 )
1918}
1919
1920pub fn ai_api_secret_ref_config_key(provider: &AiProvider, alias: &str) -> String {
1921 format!(
1922 "red.config.ai.{}.{}.secret_ref",
1923 provider.token(),
1924 normalize_credential_alias_path(alias)
1925 )
1926}
1927
1928pub fn ai_api_legacy_config_key(provider: &AiProvider, alias: &str) -> String {
1929 format!(
1930 "red.config.ai.{}.{}.key",
1931 provider.token(),
1932 normalize_credential_alias_path(alias)
1933 )
1934}
1935
1936fn normalize_credential_alias_path(alias: &str) -> String {
1937 let alias = alias.trim();
1938 if alias.is_empty() {
1939 "default".to_string()
1940 } else {
1941 alias.to_ascii_lowercase()
1942 }
1943}
1944
1945fn resolve_key_from_env_alias(provider: &AiProvider, alias: &str) -> Option<String> {
1946 let env_name = provider.alias_key_env_name(alias);
1947 std::env::var(env_name)
1948 .ok()
1949 .map(|v| v.trim().to_string())
1950 .filter(|v| !v.is_empty())
1951}
1952
1953fn normalize_alias_token(alias: &str) -> String {
1954 let mut out = String::with_capacity(alias.len());
1955 for character in alias.chars() {
1956 if character.is_ascii_alphanumeric() {
1957 out.push(character.to_ascii_uppercase());
1958 } else {
1959 out.push('_');
1960 }
1961 }
1962 while out.contains("__") {
1963 out = out.replace("__", "_");
1964 }
1965 out.trim_matches('_').to_string()
1966}
1967
1968pub fn resolve_api_key_from_runtime(
1970 provider: &AiProvider,
1971 credential_alias: Option<&str>,
1972 runtime: &crate::runtime::RedDBRuntime,
1973) -> crate::RedDBResult<String> {
1974 use crate::application::ports::RuntimeEntityPort;
1975 resolve_api_key(provider, credential_alias, |kv_key| {
1976 if kv_key.starts_with("red.secret.") {
1977 return Ok(runtime.vault_kv_get(kv_key));
1978 }
1979 match runtime.get_kv("red_config", kv_key)? {
1980 Some((crate::storage::schema::Value::Text(secret), _)) => Ok(Some(secret.to_string())),
1981 Some(_) => Ok(None),
1982 None => Ok(None),
1983 }
1984 })
1985}
1986
1987pub fn huggingface_embeddings(
1993 api_key: &str,
1994 model: &str,
1995 inputs: &[String],
1996 api_base: &str,
1997) -> crate::RedDBResult<OpenAiEmbeddingResponse> {
1998 let url = format!("{api_base}/pipeline/feature-extraction/{model}");
1999 let mut embeddings = Vec::with_capacity(inputs.len());
2000
2001 for input in inputs {
2002 let payload = crate::serde_json::json!({ "inputs": input }).to_string_compact();
2003 let (status, body_str) = http_post_json(&url, api_key, &[], payload, 90)
2004 .map_err(|e| crate::RedDBError::Query(format!("HuggingFace API error: {e}")))?;
2005 if !(200..300).contains(&status) {
2006 return Err(crate::RedDBError::Query(format!(
2007 "HuggingFace API error (status {status}): {body_str}"
2008 )));
2009 }
2010 let body: JsonValue = crate::serde_json::from_str(&body_str).map_err(|e| {
2011 crate::RedDBError::Query(format!("HuggingFace response parse error: {e}"))
2012 })?;
2013
2014 let vector: Vec<f32> = match &body {
2016 JsonValue::Array(outer) => outer
2017 .iter()
2018 .filter_map(|v| v.as_f64().map(|n| n as f32))
2019 .collect(),
2020 _ => {
2021 return Err(crate::RedDBError::Query(
2022 "unexpected HuggingFace embedding response format".to_string(),
2023 ))
2024 }
2025 };
2026 embeddings.push(vector);
2027 }
2028
2029 Ok(OpenAiEmbeddingResponse {
2030 provider: "huggingface",
2031 model: model.to_string(),
2032 embeddings,
2033 prompt_tokens: None,
2034 total_tokens: None,
2035 })
2036}
2037
2038pub fn huggingface_prompt(
2040 api_key: &str,
2041 model: &str,
2042 prompt: &str,
2043 temperature: Option<f32>,
2044 max_tokens: Option<usize>,
2045 api_base: &str,
2046) -> crate::RedDBResult<AiPromptResponse> {
2047 let url = format!("{api_base}/models/{model}");
2048 let mut params = Map::new();
2049 if let Some(t) = temperature {
2050 params.insert("temperature".into(), JsonValue::Number(t as f64));
2051 }
2052 params.insert(
2053 "max_new_tokens".into(),
2054 JsonValue::Number(max_tokens.unwrap_or(512) as f64),
2055 );
2056 let payload = crate::serde_json::json!({
2057 "inputs": prompt,
2058 "parameters": JsonValue::Object(params)
2059 });
2060
2061 let (status, body_str) =
2062 http_post_json(&url, api_key, &[], payload.to_string_compact(), 120)
2063 .map_err(|e| crate::RedDBError::Query(format!("HuggingFace API error: {e}")))?;
2064 if !(200..300).contains(&status) {
2065 return Err(crate::RedDBError::Query(format!(
2066 "HuggingFace API error (status {status}): {body_str}"
2067 )));
2068 }
2069 let body: JsonValue = crate::serde_json::from_str(&body_str)
2070 .map_err(|e| crate::RedDBError::Query(format!("HuggingFace response parse error: {e}")))?;
2071
2072 let output_text = match &body {
2073 JsonValue::Array(arr) => arr
2074 .first()
2075 .and_then(|v| v.get("generated_text"))
2076 .and_then(JsonValue::as_str)
2077 .unwrap_or("")
2078 .to_string(),
2079 _ => body
2080 .get("generated_text")
2081 .and_then(JsonValue::as_str)
2082 .unwrap_or("")
2083 .to_string(),
2084 };
2085
2086 Ok(AiPromptResponse {
2087 provider: "huggingface",
2088 model: model.to_string(),
2089 output_text,
2090 output_chunks: None,
2091 prompt_tokens: None,
2092 completion_tokens: None,
2093 total_tokens: None,
2094 stop_reason: None,
2095 })
2096}
2097
2098pub fn local_embeddings(
2104 _model_id: &str,
2105 _texts: &[String],
2106) -> crate::RedDBResult<OpenAiEmbeddingResponse> {
2107 Err(crate::RedDBError::FeatureNotEnabled(
2108 "local model inference requires the 'local-models' feature flag. \
2109 Build with: cargo build --features local-models. \
2110 Alternatively, use 'ollama' provider with a local Ollama server."
2111 .to_string(),
2112 ))
2113}
2114
2115pub fn local_prompt(_model_id: &str, _prompt: &str) -> crate::RedDBResult<AiPromptResponse> {
2117 Err(crate::RedDBError::FeatureNotEnabled(
2118 "local model inference requires the 'local-models' feature flag. \
2119 Build with: cargo build --features local-models. \
2120 Alternatively, use 'ollama' provider with a local Ollama server."
2121 .to_string(),
2122 ))
2123}
2124
2125fn grpc_collect_embedding_inputs(
2138 runtime: &crate::runtime::RedDBRuntime,
2139 payload: &JsonValue,
2140) -> crate::RedDBResult<Vec<String>> {
2141 if let Some(source_query) = payload
2142 .get("source_query")
2143 .and_then(|v| v.as_str())
2144 .map(str::trim)
2145 .filter(|s| !s.is_empty())
2146 {
2147 return grpc_collect_inputs_from_source_query(runtime, payload, source_query);
2148 }
2149
2150 if let Some(arr) = payload.get("inputs").and_then(|v| v.as_array()) {
2151 let mut out = Vec::with_capacity(arr.len());
2152 for (idx, v) in arr.iter().enumerate() {
2153 let text = v.as_str().ok_or_else(|| {
2154 crate::RedDBError::Query(format!("field 'inputs[{idx}]' must be a string"))
2155 })?;
2156 if text.trim().is_empty() {
2157 return Err(crate::RedDBError::Query(format!(
2158 "field 'inputs[{idx}]' cannot be empty"
2159 )));
2160 }
2161 out.push(text.to_string());
2162 }
2163 if out.is_empty() {
2164 return Err(crate::RedDBError::Query(
2165 "field 'inputs' must be a non-empty array of strings".to_string(),
2166 ));
2167 }
2168 return Ok(out);
2169 }
2170
2171 if let Some(single) = payload
2172 .get("input")
2173 .and_then(|v| v.as_str())
2174 .map(str::trim)
2175 .filter(|s| !s.is_empty())
2176 {
2177 return Ok(vec![single.to_string()]);
2178 }
2179
2180 Err(crate::RedDBError::Query(
2181 "provide either 'input', 'inputs', or 'source_query'".to_string(),
2182 ))
2183}
2184
2185fn grpc_collect_inputs_from_source_query(
2186 runtime: &crate::runtime::RedDBRuntime,
2187 payload: &JsonValue,
2188 source_query: &str,
2189) -> crate::RedDBResult<Vec<String>> {
2190 let result = runtime
2191 .execute_query(source_query)
2192 .map_err(|err| crate::RedDBError::Query(format!("source_query failed: {err}")))?;
2193
2194 let source_mode = payload
2195 .get("source_mode")
2196 .and_then(|v| v.as_str())
2197 .map(str::trim)
2198 .filter(|s| !s.is_empty())
2199 .unwrap_or("row")
2200 .to_ascii_lowercase();
2201
2202 let mut out: Vec<String> = Vec::new();
2203 match source_mode.as_str() {
2204 "row" => {
2205 let field = payload
2206 .get("source_field")
2207 .and_then(|v| v.as_str())
2208 .map(str::trim)
2209 .filter(|s| !s.is_empty())
2210 .ok_or_else(|| {
2211 crate::RedDBError::Query(
2212 "field 'source_field' is required when source_mode='row'".to_string(),
2213 )
2214 })?;
2215 for rec in &result.result.records {
2216 for (key, value) in rec.iter_fields() {
2217 if key.as_ref() == field {
2218 if let crate::storage::schema::Value::Text(text) = value {
2219 let trimmed = text.trim();
2220 if !trimmed.is_empty() {
2221 out.push(trimmed.to_string());
2222 }
2223 }
2224 }
2225 }
2226 }
2227 }
2228 "result" => {
2229 for rec in &result.result.records {
2230 for (_, value) in rec.iter_fields() {
2231 if let crate::storage::schema::Value::Text(text) = value {
2232 let trimmed = text.trim();
2233 if !trimmed.is_empty() {
2234 out.push(trimmed.to_string());
2235 }
2236 }
2237 }
2238 }
2239 }
2240 other => {
2241 return Err(crate::RedDBError::Query(format!(
2242 "field 'source_mode' must be 'row' or 'result' (got '{other}')"
2243 )));
2244 }
2245 }
2246
2247 if out.is_empty() {
2248 return Err(crate::RedDBError::Query(
2249 "source_query produced zero non-empty text inputs".to_string(),
2250 ));
2251 }
2252 Ok(out)
2253}
2254
2255pub fn grpc_embeddings(
2277 runtime: &crate::runtime::RedDBRuntime,
2278 payload: &JsonValue,
2279) -> crate::RedDBResult<JsonValue> {
2280 let provider_name = payload
2281 .get("provider")
2282 .and_then(|v| v.as_str())
2283 .map(str::trim)
2284 .filter(|s| !s.is_empty())
2285 .unwrap_or("openai");
2286 let provider = parse_provider(provider_name)?;
2287 match &provider {
2292 AiProvider::Anthropic => {
2293 return Err(crate::RedDBError::Query(
2294 "Anthropic does not offer an embeddings API. \
2295 Re-issue the request against an OpenAI-compatible \
2296 provider (openai, groq, ollama, openrouter, together, \
2297 venice, deepseek), HuggingFace, or a custom base URL — \
2298 RedDB does not silently route embeddings to a \
2299 different provider than the one you named."
2300 .to_string(),
2301 ));
2302 }
2303 AiProvider::Local => {
2304 return Err(crate::RedDBError::Query(
2305 "Local embeddings require the `local-models` feature \
2306 flag at engine build time."
2307 .to_string(),
2308 ));
2309 }
2310 _ => {}
2311 }
2312
2313 let inputs: Vec<String> = grpc_collect_embedding_inputs(runtime, payload)?;
2314
2315 let model = payload
2316 .get("model")
2317 .and_then(|v| v.as_str())
2318 .map(str::trim)
2319 .filter(|s| !s.is_empty())
2320 .map(str::to_string)
2321 .or_else(|| {
2322 std::env::var(format!(
2323 "REDDB_{}_EMBEDDING_MODEL",
2324 provider.token().to_ascii_uppercase()
2325 ))
2326 .ok()
2327 })
2328 .or_else(|| std::env::var("REDDB_OPENAI_EMBEDDING_MODEL").ok())
2329 .filter(|v| !v.trim().is_empty())
2330 .unwrap_or_else(|| provider.default_embedding_model().to_string());
2331
2332 let credential = payload
2333 .get("credential")
2334 .and_then(|v| v.as_str())
2335 .map(str::to_string);
2336 let api_key = resolve_api_key_from_runtime(&provider, credential.as_deref(), runtime)?;
2337
2338 let dimensions = payload
2339 .get("dimensions")
2340 .and_then(|v| v.as_i64())
2341 .and_then(|v| usize::try_from(v).ok())
2342 .filter(|v| *v > 0);
2343
2344 let response = match &provider {
2345 AiProvider::HuggingFace => {
2346 huggingface_embeddings(&api_key, &model, &inputs, &provider.resolve_api_base())?
2347 }
2348 _ => {
2349 let transport = crate::runtime::ai::transport::AiTransport::from_runtime(runtime);
2350 let request = OpenAiEmbeddingRequest {
2351 api_key,
2352 model,
2353 inputs,
2354 dimensions,
2355 api_base: provider.resolve_api_base(),
2356 };
2357 crate::runtime::ai::block_on_ai(async move {
2358 openai_embeddings_async(&transport, request).await
2359 })
2360 .and_then(|result| result)?
2361 }
2362 };
2363
2364 let embeddings_json: Vec<JsonValue> = response
2365 .embeddings
2366 .into_iter()
2367 .map(|vec| {
2368 JsonValue::Array(
2369 vec.into_iter()
2370 .map(|f| JsonValue::Number(f as f64))
2371 .collect(),
2372 )
2373 })
2374 .collect();
2375
2376 let mut obj = Map::new();
2377 obj.insert(
2378 "provider".to_string(),
2379 JsonValue::String(response.provider.to_string()),
2380 );
2381 obj.insert("model".to_string(), JsonValue::String(response.model));
2382 obj.insert("embeddings".to_string(), JsonValue::Array(embeddings_json));
2383 if let Some(pt) = response.prompt_tokens {
2384 obj.insert("prompt_tokens".to_string(), JsonValue::Number(pt as f64));
2385 }
2386 if let Some(tt) = response.total_tokens {
2387 obj.insert("total_tokens".to_string(), JsonValue::Number(tt as f64));
2388 }
2389 Ok(JsonValue::Object(obj))
2390}
2391
2392pub fn grpc_prompt(
2394 _runtime: &crate::runtime::RedDBRuntime,
2395 _payload: &JsonValue,
2396) -> crate::RedDBResult<JsonValue> {
2397 Err(crate::RedDBError::FeatureNotEnabled(
2398 "AI prompt via gRPC requires HTTP endpoint; use POST /ai/prompt".to_string(),
2399 ))
2400}
2401
2402pub fn grpc_credentials(
2404 _runtime: &crate::runtime::RedDBRuntime,
2405 _payload: &JsonValue,
2406) -> crate::RedDBResult<JsonValue> {
2407 Err(crate::RedDBError::FeatureNotEnabled(
2408 "AI credentials via gRPC requires HTTP endpoint; use POST /ai/credentials".to_string(),
2409 ))
2410}
2411
2412#[derive(Debug, Clone, Default, PartialEq, Eq)]
2428pub struct OpenAiCompatUsage {
2429 pub input_tokens: Option<u64>,
2430 pub output_tokens: Option<u64>,
2431 pub total_tokens: Option<u64>,
2432}
2433
2434#[derive(Debug, Clone)]
2435pub struct OpenAiCompatChatRequest {
2436 pub api_base: String,
2437 pub api_key: String,
2438 pub model: String,
2439 pub prompt: String,
2440 pub temperature: Option<f32>,
2441 pub seed: Option<u64>,
2442 pub max_output_tokens: Option<usize>,
2443 pub extra_headers: Vec<(String, String)>,
2444}
2445
2446#[derive(Debug, Clone)]
2447pub struct OpenAiCompatChatResponse {
2448 pub model: String,
2449 pub output_text: String,
2450 pub stop_reason: Option<String>,
2451 pub usage: OpenAiCompatUsage,
2452}
2453
2454#[derive(Debug, Clone)]
2455pub struct OpenAiCompatEmbeddingsRequest {
2456 pub api_base: String,
2457 pub api_key: String,
2458 pub model: String,
2459 pub inputs: Vec<String>,
2460 pub dimensions: Option<usize>,
2461 pub extra_headers: Vec<(String, String)>,
2462}
2463
2464#[derive(Debug, Clone)]
2465pub struct OpenAiCompatEmbeddingsResponse {
2466 pub model: String,
2467 pub embeddings: Vec<Vec<f32>>,
2468 pub usage: OpenAiCompatUsage,
2469}
2470
2471fn extra_header_refs(headers: &[(String, String)]) -> Vec<(&str, &str)> {
2472 headers
2473 .iter()
2474 .map(|(k, v)| (k.as_str(), v.as_str()))
2475 .collect()
2476}
2477
2478pub fn openai_compat_chat(
2486 request: OpenAiCompatChatRequest,
2487) -> RedDBResult<OpenAiCompatChatResponse> {
2488 if request.model.trim().is_empty() {
2489 return Err(RedDBError::Query(
2490 "openai-compat: model cannot be empty".to_string(),
2491 ));
2492 }
2493 if request.prompt.trim().is_empty() {
2494 return Err(RedDBError::Query(
2495 "openai-compat: prompt cannot be empty".to_string(),
2496 ));
2497 }
2498
2499 let url = format!(
2500 "{}/chat/completions",
2501 request.api_base.trim_end_matches('/')
2502 );
2503 let payload = build_openai_prompt_payload(
2504 &request.model,
2505 &request.prompt,
2506 request.temperature,
2507 request.seed,
2508 request.max_output_tokens,
2509 false,
2510 );
2511
2512 let extra = extra_header_refs(&request.extra_headers);
2513 let (status, body) = http_post_json(&url, &request.api_key, &extra, payload, 120)
2514 .map_err(|err| RedDBError::Query(format!("openai-compat transport error: {err}")))?;
2515
2516 if !(200..300).contains(&status) {
2517 let message = openai_error_message(&body).unwrap_or_else(|| {
2518 if body.trim().is_empty() {
2519 "openai-compat chat request failed".to_string()
2520 } else {
2521 body.clone()
2522 }
2523 });
2524 return Err(RedDBError::Query(format!(
2525 "openai-compat chat request failed (status {status}): {message}"
2526 )));
2527 }
2528
2529 let parsed = parse_openai_prompt_response(&body, &request.model)?;
2530 Ok(OpenAiCompatChatResponse {
2531 model: parsed.model,
2532 output_text: parsed.output_text,
2533 stop_reason: parsed.stop_reason,
2534 usage: OpenAiCompatUsage {
2535 input_tokens: parsed.prompt_tokens,
2536 output_tokens: parsed.completion_tokens,
2537 total_tokens: parsed.total_tokens,
2538 },
2539 })
2540}
2541
2542pub fn openai_compat_embeddings(
2544 request: OpenAiCompatEmbeddingsRequest,
2545) -> RedDBResult<OpenAiCompatEmbeddingsResponse> {
2546 if request.model.trim().is_empty() {
2547 return Err(RedDBError::Query(
2548 "openai-compat: embedding model cannot be empty".to_string(),
2549 ));
2550 }
2551 if request.inputs.is_empty() {
2552 return Err(RedDBError::Query(
2553 "openai-compat: at least one input is required".to_string(),
2554 ));
2555 }
2556
2557 let url = format!("{}/embeddings", request.api_base.trim_end_matches('/'));
2558 let payload =
2559 build_openai_embedding_payload(&request.model, &request.inputs, request.dimensions);
2560
2561 let extra = extra_header_refs(&request.extra_headers);
2562 let (status, body) = http_post_json(&url, &request.api_key, &extra, payload, 90)
2563 .map_err(|err| RedDBError::Query(format!("openai-compat transport error: {err}")))?;
2564
2565 if !(200..300).contains(&status) {
2566 let message = openai_error_message(&body).unwrap_or_else(|| {
2567 if body.trim().is_empty() {
2568 "openai-compat embeddings request failed".to_string()
2569 } else {
2570 body.clone()
2571 }
2572 });
2573 return Err(RedDBError::Query(format!(
2574 "openai-compat embeddings request failed (status {status}): {message}"
2575 )));
2576 }
2577
2578 let parsed = parse_openai_embedding_response(&body)?;
2579 Ok(OpenAiCompatEmbeddingsResponse {
2580 model: parsed.model,
2581 embeddings: parsed.embeddings,
2582 usage: OpenAiCompatUsage {
2583 input_tokens: parsed.prompt_tokens,
2584 output_tokens: None,
2585 total_tokens: parsed.total_tokens,
2586 },
2587 })
2588}
2589
2590#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2602pub enum AiProviderMode {
2603 OpenAiCompat,
2605 OpenAiNative,
2607 AnthropicNative,
2609}
2610
2611impl AiProviderMode {
2612 pub fn token(&self) -> &'static str {
2613 match self {
2614 Self::OpenAiCompat => "openai-compat",
2615 Self::OpenAiNative => "openai-native",
2616 Self::AnthropicNative => "anthropic-native",
2617 }
2618 }
2619}
2620
2621pub fn parse_provider_mode(name: &str) -> Option<AiProviderMode> {
2623 match name.trim().to_ascii_lowercase().as_str() {
2624 "openai-compat" | "openai_compat" | "openaicompat" => Some(AiProviderMode::OpenAiCompat),
2625 "openai-native" | "openai_native" | "openainative" => Some(AiProviderMode::OpenAiNative),
2626 "anthropic-native" | "anthropic_native" | "anthropicnative" => {
2627 Some(AiProviderMode::AnthropicNative)
2628 }
2629 _ => None,
2630 }
2631}
2632
2633pub fn resolve_provider_mode<F>(kv_getter: &F) -> Option<AiProviderMode>
2639where
2640 F: Fn(&str) -> crate::RedDBResult<Option<String>>,
2641{
2642 if let Ok(value) = std::env::var("REDDB_AI_PROVIDER_MODE") {
2643 if let Some(mode) = parse_provider_mode(&value) {
2644 return Some(mode);
2645 }
2646 }
2647 if let Ok(Some(value)) = kv_getter("red.config.ai.provider") {
2648 if let Some(mode) = parse_provider_mode(&value) {
2649 return Some(mode);
2650 }
2651 }
2652 None
2653}
2654
2655pub fn provider_mode_to_provider(mode: AiProviderMode) -> AiProvider {
2659 match mode {
2660 AiProviderMode::OpenAiNative => AiProvider::OpenAi,
2661 AiProviderMode::AnthropicNative => AiProvider::Anthropic,
2662 AiProviderMode::OpenAiCompat => AiProvider::Custom(String::new()),
2663 }
2664}