1use std::io::BufRead;
7use std::time::Duration;
8
9use crate::json::{parse_json, Map, Value as JsonValue};
10use crate::{RedDBError, RedDBResult};
11
12fn http_post_json(
18 url: &str,
19 api_key: &str,
20 extra_headers: &[(&str, &str)],
21 payload: String,
22 read_timeout_secs: u64,
23) -> Result<(u16, String), String> {
24 let agent: ureq::Agent = ureq::Agent::config_builder()
25 .timeout_connect(Some(Duration::from_secs(10)))
26 .timeout_send_request(Some(Duration::from_secs(30)))
27 .timeout_recv_response(Some(Duration::from_secs(read_timeout_secs)))
28 .timeout_recv_body(Some(Duration::from_secs(read_timeout_secs)))
29 .http_status_as_error(false)
30 .build()
31 .into();
32
33 let mut req = agent
34 .post(url)
35 .header("Content-Type", "application/json")
36 .header("Accept", "application/json");
37 for (k, v) in extra_headers {
38 req = req.header(*k, *v);
39 }
40 let trimmed_key = api_key.trim();
41 if !trimmed_key.is_empty() {
42 req = req.header("Authorization", &format!("Bearer {}", trimmed_key));
43 }
44
45 match req.send(payload) {
46 Ok(mut resp) => {
47 let status = resp.status().as_u16();
48 let body = resp
49 .body_mut()
50 .read_to_string()
51 .map_err(|err| format!("failed to read response body: {err}"))?;
52 Ok((status, body))
53 }
54 Err(err) => Err(format!("{err}")),
55 }
56}
57
58pub const DEFAULT_OPENAI_EMBEDDING_MODEL: &str = "text-embedding-3-small";
59pub const DEFAULT_OPENAI_API_BASE: &str = "https://api.openai.com/v1";
60pub const DEFAULT_OPENAI_PROMPT_MODEL: &str = "gpt-4.1-mini";
61pub const DEFAULT_ANTHROPIC_PROMPT_MODEL: &str = "claude-3-5-haiku-latest";
62pub const DEFAULT_ANTHROPIC_API_BASE: &str = "https://api.anthropic.com/v1";
63pub const DEFAULT_ANTHROPIC_VERSION: &str = "2023-06-01";
64
65#[derive(Debug, Clone)]
66pub struct OpenAiEmbeddingRequest {
67 pub api_key: String,
68 pub model: String,
69 pub inputs: Vec<String>,
70 pub dimensions: Option<usize>,
71 pub api_base: String,
72}
73
74#[derive(Debug, Clone)]
75pub struct OpenAiEmbeddingResponse {
76 pub provider: &'static str,
77 pub model: String,
78 pub embeddings: Vec<Vec<f32>>,
79 pub prompt_tokens: Option<u64>,
80 pub total_tokens: Option<u64>,
81}
82
83#[derive(Debug, Clone)]
84pub struct OpenAiPromptRequest {
85 pub api_key: String,
86 pub model: String,
87 pub prompt: String,
88 pub temperature: Option<f32>,
89 pub seed: Option<u64>,
90 pub max_output_tokens: Option<usize>,
91 pub api_base: String,
92 pub stream: bool,
93}
94
95#[derive(Debug, Clone)]
96pub struct AnthropicPromptRequest {
97 pub api_key: String,
98 pub model: String,
99 pub prompt: String,
100 pub temperature: Option<f32>,
101 pub max_output_tokens: Option<usize>,
102 pub api_base: String,
103 pub anthropic_version: String,
104}
105
106#[derive(Debug, Clone)]
107pub struct AiPromptResponse {
108 pub provider: &'static str,
109 pub model: String,
110 pub output_text: String,
111 pub output_chunks: Option<Vec<String>>,
112 pub prompt_tokens: Option<u64>,
113 pub completion_tokens: Option<u64>,
114 pub total_tokens: Option<u64>,
115 pub stop_reason: Option<String>,
116}
117
118#[deprecated(
119 since = "1.0.0",
120 note = "use AiBatchClient::embed_batch for embeddings or openai_embeddings_async with AiTransport when token usage metadata is required"
121)]
122pub fn openai_embeddings(request: OpenAiEmbeddingRequest) -> RedDBResult<OpenAiEmbeddingResponse> {
123 if request.model.trim().is_empty() {
124 return Err(RedDBError::Query(
125 "OpenAI embedding model cannot be empty".to_string(),
126 ));
127 }
128 if request.inputs.is_empty() {
129 return Err(RedDBError::Query(
130 "at least one input is required for embeddings".to_string(),
131 ));
132 }
133
134 let url = format!("{}/embeddings", request.api_base.trim_end_matches('/'));
135 let payload =
136 build_openai_embedding_payload(&request.model, &request.inputs, request.dimensions);
137
138 let (status, body) = http_post_json(&url, &request.api_key, &[], payload, 90)
139 .map_err(|err| RedDBError::Query(format!("OpenAI transport error: {err}")))?;
140
141 if !(200..300).contains(&status) {
142 let message = openai_error_message(&body)
143 .unwrap_or_else(|| "OpenAI embeddings request failed".to_string());
144 return Err(RedDBError::Query(format!(
145 "OpenAI embeddings request failed (status {status}): {message}"
146 )));
147 }
148
149 parse_openai_embedding_response(&body)
150}
151
152#[deprecated(since = "1.0.0", note = "use openai_prompt_async with AiTransport")]
153pub fn openai_prompt(request: OpenAiPromptRequest) -> RedDBResult<AiPromptResponse> {
154 if request.model.trim().is_empty() {
155 return Err(RedDBError::Query(
156 "OpenAI prompt model cannot be empty".to_string(),
157 ));
158 }
159 if request.prompt.trim().is_empty() {
160 return Err(RedDBError::Query("prompt cannot be empty".to_string()));
161 }
162
163 let url = format!(
164 "{}/chat/completions",
165 request.api_base.trim_end_matches('/')
166 );
167 let payload = build_openai_prompt_payload(
168 &request.model,
169 &request.prompt,
170 request.temperature,
171 request.seed,
172 request.max_output_tokens,
173 false,
174 );
175
176 let (status, body) = http_post_json(&url, &request.api_key, &[], payload, 120)
177 .map_err(|err| RedDBError::Query(format!("OpenAI transport error: {err}")))?;
178
179 if !(200..300).contains(&status) {
180 let message = openai_error_message(&body)
181 .unwrap_or_else(|| "OpenAI prompt request failed".to_string());
182 return Err(RedDBError::Query(format!(
183 "OpenAI prompt request failed (status {status}): {message}"
184 )));
185 }
186
187 parse_openai_prompt_response(&body, &request.model)
188}
189
190#[deprecated(since = "1.0.0", note = "use anthropic_prompt_async with AiTransport")]
191pub fn anthropic_prompt(request: AnthropicPromptRequest) -> RedDBResult<AiPromptResponse> {
192 if request.api_key.trim().is_empty() {
193 return Err(RedDBError::Query(
194 "Anthropic API key cannot be empty".to_string(),
195 ));
196 }
197 if request.model.trim().is_empty() {
198 return Err(RedDBError::Query(
199 "Anthropic model cannot be empty".to_string(),
200 ));
201 }
202 if request.prompt.trim().is_empty() {
203 return Err(RedDBError::Query("prompt cannot be empty".to_string()));
204 }
205
206 let url = format!("{}/messages", request.api_base.trim_end_matches('/'));
207 let payload = build_anthropic_prompt_payload(
208 &request.model,
209 &request.prompt,
210 request.temperature,
211 request.max_output_tokens,
212 );
213
214 let extra = [
219 ("x-api-key", request.api_key.as_str()),
220 ("anthropic-version", request.anthropic_version.as_str()),
221 ];
222 let (status, body) = http_post_json(&url, "", &extra, payload, 120)
223 .map_err(|err| RedDBError::Query(format!("Anthropic transport error: {err}")))?;
224
225 if !(200..300).contains(&status) {
226 let message = anthropic_error_message(&body)
227 .unwrap_or_else(|| "Anthropic prompt request failed".to_string());
228 return Err(RedDBError::Query(format!(
229 "Anthropic prompt request failed (status {status}): {message}"
230 )));
231 }
232
233 parse_anthropic_prompt_response(&body, &request.model)
234}
235
236pub async fn openai_embeddings_async(
241 transport: &crate::runtime::ai::transport::AiTransport,
242 request: OpenAiEmbeddingRequest,
243) -> RedDBResult<OpenAiEmbeddingResponse> {
244 if request.model.trim().is_empty() {
245 return Err(RedDBError::Query(
246 "OpenAI embedding model cannot be empty".to_string(),
247 ));
248 }
249 if request.inputs.is_empty() {
250 return Err(RedDBError::Query(
251 "at least one input is required for embeddings".to_string(),
252 ));
253 }
254
255 let url = format!("{}/embeddings", request.api_base.trim_end_matches('/'));
256 let payload =
257 build_openai_embedding_payload(&request.model, &request.inputs, request.dimensions);
258 let mut http_req =
259 crate::runtime::ai::transport::AiHttpRequest::post_json("openai-compatible", url, payload);
260 let trimmed_key = request.api_key.trim();
261 if !trimmed_key.is_empty() {
262 http_req = http_req.header("authorization", format!("Bearer {}", trimmed_key));
263 }
264
265 let response = transport
266 .request(http_req)
267 .await
268 .map_err(|e| RedDBError::Query(e.to_string()))?;
269
270 parse_openai_embedding_response(&response.body)
271}
272
273pub async fn openai_prompt_async(
278 transport: &crate::runtime::ai::transport::AiTransport,
279 request: OpenAiPromptRequest,
280) -> RedDBResult<AiPromptResponse> {
281 if request.model.trim().is_empty() {
282 return Err(RedDBError::Query(
283 "OpenAI prompt model cannot be empty".to_string(),
284 ));
285 }
286 if request.prompt.trim().is_empty() {
287 return Err(RedDBError::Query("prompt cannot be empty".to_string()));
288 }
289
290 let url = format!(
291 "{}/chat/completions",
292 request.api_base.trim_end_matches('/')
293 );
294 let payload = build_openai_prompt_payload(
295 &request.model,
296 &request.prompt,
297 request.temperature,
298 request.seed,
299 request.max_output_tokens,
300 request.stream,
301 );
302 let http_req = crate::runtime::ai::transport::AiHttpRequest::post_json("openai", url, payload)
303 .model(request.model.clone())
304 .header("authorization", format!("Bearer {}", request.api_key));
305
306 let response = transport
307 .request(http_req)
308 .await
309 .map_err(|e| RedDBError::Query(e.to_string()))?;
310
311 if request.stream {
312 parse_openai_streaming_prompt_response(&response.body, &request.model)
313 } else {
314 parse_openai_prompt_response(&response.body, &request.model)
315 }
316}
317
318pub fn openai_prompt_streaming(
324 request: OpenAiPromptRequest,
325 mut on_chunk: impl FnMut(&str) -> RedDBResult<()>,
326) -> RedDBResult<AiPromptResponse> {
327 if request.model.trim().is_empty() {
328 return Err(RedDBError::Query(
329 "OpenAI prompt model cannot be empty".to_string(),
330 ));
331 }
332 if request.prompt.trim().is_empty() {
333 return Err(RedDBError::Query("prompt cannot be empty".to_string()));
334 }
335
336 let url = format!(
337 "{}/chat/completions",
338 request.api_base.trim_end_matches('/')
339 );
340 let payload = build_openai_prompt_payload(
341 &request.model,
342 &request.prompt,
343 request.temperature,
344 request.seed,
345 request.max_output_tokens,
346 true,
347 );
348
349 let agent: ureq::Agent = ureq::Agent::config_builder()
350 .timeout_connect(Some(Duration::from_secs(10)))
351 .timeout_send_request(Some(Duration::from_secs(30)))
352 .timeout_recv_response(Some(Duration::from_secs(120)))
353 .timeout_recv_body(Some(Duration::from_secs(120)))
354 .http_status_as_error(false)
355 .build()
356 .into();
357
358 let mut req = agent
359 .post(&url)
360 .header("content-type", "application/json")
361 .header("accept", "text/event-stream");
362 let trimmed_key = request.api_key.trim();
363 if !trimmed_key.is_empty() {
364 req = req.header("authorization", &format!("Bearer {}", trimmed_key));
365 }
366
367 let mut response = req
368 .send(payload)
369 .map_err(|err| RedDBError::Query(format!("OpenAI transport error: {err}")))?;
370 let status = response.status().as_u16();
371 if !(200..300).contains(&status) {
372 let body = response
373 .body_mut()
374 .read_to_string()
375 .unwrap_or_else(|err| format!("failed to read response body: {err}"));
376 let message = openai_error_message(&body)
377 .unwrap_or_else(|| "OpenAI prompt request failed".to_string());
378 return Err(RedDBError::Query(format!(
379 "OpenAI prompt request failed (status {status}): {message}"
380 )));
381 }
382
383 let mut model = request.model;
384 let mut chunks = Vec::new();
385 let mut prompt_tokens = None;
386 let mut completion_tokens = None;
387 let mut total_tokens = None;
388 let mut stop_reason = None;
389
390 let mut reader = std::io::BufReader::new(response.body_mut().as_reader());
391 let mut line = String::new();
392 loop {
393 line.clear();
394 let read = reader.read_line(&mut line).map_err(|err| {
395 RedDBError::Query(format!("failed to read OpenAI streaming response: {err}"))
396 })?;
397 if read == 0 {
398 break;
399 }
400
401 let trimmed = line.trim();
402 let Some(data) = trimmed.strip_prefix("data:") else {
403 continue;
404 };
405 let data = data.trim();
406 if data.is_empty() {
407 continue;
408 }
409 if data == "[DONE]" {
410 break;
411 }
412
413 let parsed = parse_json(data).map_err(|err| {
414 RedDBError::Query(format!(
415 "invalid OpenAI streaming prompt JSON response: {err}"
416 ))
417 })?;
418 let json = JsonValue::from(parsed);
419 if let Some(value) = json.get("model").and_then(JsonValue::as_str) {
420 model = value.to_string();
421 }
422 if let Some(usage) = json.get("usage") {
423 prompt_tokens = usage
424 .get("prompt_tokens")
425 .and_then(JsonValue::as_i64)
426 .and_then(|value| u64::try_from(value).ok())
427 .or(prompt_tokens);
428 completion_tokens = usage
429 .get("completion_tokens")
430 .and_then(JsonValue::as_i64)
431 .and_then(|value| u64::try_from(value).ok())
432 .or(completion_tokens);
433 total_tokens = usage
434 .get("total_tokens")
435 .and_then(JsonValue::as_i64)
436 .and_then(|value| u64::try_from(value).ok())
437 .or(total_tokens);
438 }
439
440 let Some(choices) = json.get("choices").and_then(JsonValue::as_array) else {
441 continue;
442 };
443 let Some(first_choice) = choices.first() else {
444 continue;
445 };
446 if let Some(reason) = first_choice
447 .get("finish_reason")
448 .and_then(JsonValue::as_str)
449 {
450 stop_reason = Some(reason.to_string());
451 }
452 if let Some(text) = first_choice
453 .get("delta")
454 .and_then(|delta| delta.get("content"))
455 .and_then(JsonValue::as_str)
456 {
457 if !text.is_empty() {
458 on_chunk(text)?;
459 chunks.push(text.to_string());
460 }
461 }
462 }
463
464 if chunks.is_empty() {
465 return Err(RedDBError::Query(
466 "OpenAI streaming prompt response missing text content".to_string(),
467 ));
468 }
469
470 let output_text = chunks.concat();
471 let total_tokens = total_tokens.or_else(|| match (prompt_tokens, completion_tokens) {
472 (Some(prompt), Some(completion)) => Some(prompt.saturating_add(completion)),
473 _ => None,
474 });
475
476 Ok(AiPromptResponse {
477 provider: "openai",
478 model,
479 output_text,
480 output_chunks: Some(chunks),
481 prompt_tokens,
482 completion_tokens,
483 total_tokens,
484 stop_reason,
485 })
486}
487
488pub async fn anthropic_prompt_async(
493 transport: &crate::runtime::ai::transport::AiTransport,
494 request: AnthropicPromptRequest,
495) -> RedDBResult<AiPromptResponse> {
496 if request.api_key.trim().is_empty() {
497 return Err(RedDBError::Query(
498 "Anthropic API key cannot be empty".to_string(),
499 ));
500 }
501 if request.model.trim().is_empty() {
502 return Err(RedDBError::Query(
503 "Anthropic model cannot be empty".to_string(),
504 ));
505 }
506 if request.prompt.trim().is_empty() {
507 return Err(RedDBError::Query("prompt cannot be empty".to_string()));
508 }
509
510 let url = format!("{}/messages", request.api_base.trim_end_matches('/'));
511 let payload = build_anthropic_prompt_payload(
512 &request.model,
513 &request.prompt,
514 request.temperature,
515 request.max_output_tokens,
516 );
517 let http_req =
518 crate::runtime::ai::transport::AiHttpRequest::post_json("anthropic", url, payload)
519 .model(request.model.clone())
520 .header("x-api-key", request.api_key)
521 .header("anthropic-version", request.anthropic_version);
522
523 let response = transport
524 .request(http_req)
525 .await
526 .map_err(|e| RedDBError::Query(e.to_string()))?;
527
528 parse_anthropic_prompt_response(&response.body, &request.model)
529}
530
531pub(crate) fn build_embedding_payload(model: &str, inputs: &[String]) -> String {
533 build_openai_embedding_payload(model, inputs, None)
534}
535
536pub(crate) fn parse_embedding_vectors(body: &str) -> Result<Vec<Vec<f32>>, String> {
538 parse_openai_embedding_response(body)
539 .map(|r| r.embeddings)
540 .map_err(|e| e.to_string())
541}
542
543pub(crate) fn parse_embedding_response(body: &str) -> Result<OpenAiEmbeddingResponse, String> {
544 parse_openai_embedding_response(body).map_err(|e| e.to_string())
545}
546
547fn build_openai_embedding_payload(
548 model: &str,
549 inputs: &[String],
550 dimensions: Option<usize>,
551) -> String {
552 let mut object = Map::new();
553 object.insert("model".to_string(), JsonValue::String(model.to_string()));
554 if inputs.len() == 1 {
555 object.insert("input".to_string(), JsonValue::String(inputs[0].clone()));
556 } else {
557 object.insert(
558 "input".to_string(),
559 JsonValue::Array(inputs.iter().cloned().map(JsonValue::String).collect()),
560 );
561 }
562 if let Some(dimensions) = dimensions {
563 object.insert(
564 "dimensions".to_string(),
565 JsonValue::Number(dimensions as f64),
566 );
567 }
568 object.insert(
569 "encoding_format".to_string(),
570 JsonValue::String("float".to_string()),
571 );
572 JsonValue::Object(object).to_string_compact()
573}
574
575fn openai_error_message(body: &str) -> Option<String> {
576 provider_error_message(body)
577}
578
579fn anthropic_error_message(body: &str) -> Option<String> {
580 provider_error_message(body)
581}
582
583fn provider_error_message(body: &str) -> Option<String> {
584 let parsed = parse_json(body).ok().map(JsonValue::from)?;
585 let error = parsed.get("error")?;
586 if let Some(message) = error.get("message").and_then(JsonValue::as_str) {
587 let trimmed = message.trim();
588 if !trimmed.is_empty() {
589 return Some(trimmed.to_string());
590 }
591 }
592 None
593}
594
595fn build_openai_prompt_payload(
596 model: &str,
597 prompt: &str,
598 temperature: Option<f32>,
599 seed: Option<u64>,
600 max_output_tokens: Option<usize>,
601 stream: bool,
602) -> String {
603 let mut object = Map::new();
604 object.insert("model".to_string(), JsonValue::String(model.to_string()));
605
606 let mut message = Map::new();
607 message.insert("role".to_string(), JsonValue::String("user".to_string()));
608 message.insert("content".to_string(), JsonValue::String(prompt.to_string()));
609 object.insert(
610 "messages".to_string(),
611 JsonValue::Array(vec![JsonValue::Object(message)]),
612 );
613
614 if let Some(temperature) = temperature {
615 object.insert(
616 "temperature".to_string(),
617 JsonValue::Number(temperature as f64),
618 );
619 }
620
621 if let Some(seed) = seed {
622 object.insert("seed".to_string(), JsonValue::Number(seed as f64));
623 }
624
625 if let Some(max_output_tokens) = max_output_tokens {
626 object.insert(
627 "max_tokens".to_string(),
628 JsonValue::Number(max_output_tokens as f64),
629 );
630 }
631
632 if stream {
633 object.insert("stream".to_string(), JsonValue::Bool(true));
634 let mut options = Map::new();
635 options.insert("include_usage".to_string(), JsonValue::Bool(true));
636 object.insert("stream_options".to_string(), JsonValue::Object(options));
637 }
638
639 JsonValue::Object(object).to_string_compact()
640}
641
642fn build_anthropic_prompt_payload(
643 model: &str,
644 prompt: &str,
645 temperature: Option<f32>,
646 max_output_tokens: Option<usize>,
647) -> String {
648 let mut object = Map::new();
649 object.insert("model".to_string(), JsonValue::String(model.to_string()));
650 object.insert(
651 "max_tokens".to_string(),
652 JsonValue::Number(max_output_tokens.unwrap_or(512) as f64),
653 );
654
655 let mut message = Map::new();
656 message.insert("role".to_string(), JsonValue::String("user".to_string()));
657 message.insert("content".to_string(), JsonValue::String(prompt.to_string()));
658 object.insert(
659 "messages".to_string(),
660 JsonValue::Array(vec![JsonValue::Object(message)]),
661 );
662
663 if let Some(temperature) = temperature {
664 object.insert(
665 "temperature".to_string(),
666 JsonValue::Number(temperature as f64),
667 );
668 }
669
670 JsonValue::Object(object).to_string_compact()
671}
672
673fn extract_text_from_parts(parts: &[JsonValue]) -> Option<String> {
674 let mut chunks = Vec::new();
675 for part in parts {
676 if let Some(text) = part.as_str() {
677 let trimmed = text.trim();
678 if !trimmed.is_empty() {
679 chunks.push(trimmed.to_string());
680 }
681 continue;
682 }
683
684 let Some(object) = part.as_object() else {
685 continue;
686 };
687 let Some(text) = object.get("text").and_then(JsonValue::as_str) else {
688 continue;
689 };
690 let trimmed = text.trim();
691 if !trimmed.is_empty() {
692 chunks.push(trimmed.to_string());
693 }
694 }
695
696 if chunks.is_empty() {
697 None
698 } else {
699 Some(chunks.join("\n\n"))
700 }
701}
702
703fn parse_openai_prompt_response(
704 body: &str,
705 requested_model: &str,
706) -> RedDBResult<AiPromptResponse> {
707 let parsed = parse_json(body)
708 .map_err(|err| RedDBError::Query(format!("invalid OpenAI prompt JSON response: {err}")))?;
709 let json = JsonValue::from(parsed);
710
711 let model = json
712 .get("model")
713 .and_then(JsonValue::as_str)
714 .unwrap_or(requested_model)
715 .to_string();
716
717 let Some(choices) = json.get("choices").and_then(JsonValue::as_array) else {
718 return Err(RedDBError::Query(
719 "OpenAI prompt response missing 'choices' array".to_string(),
720 ));
721 };
722 let Some(first_choice) = choices.first() else {
723 return Err(RedDBError::Query(
724 "OpenAI prompt response contains no choices".to_string(),
725 ));
726 };
727
728 let output_text = first_choice
729 .get("message")
730 .and_then(|message| {
731 if let Some(text) = message.get("content").and_then(JsonValue::as_str) {
732 let trimmed = text.trim();
733 if !trimmed.is_empty() {
734 return Some(trimmed.to_string());
735 }
736 }
737 message
738 .get("content")
739 .and_then(JsonValue::as_array)
740 .and_then(extract_text_from_parts)
741 })
742 .ok_or_else(|| {
743 RedDBError::Query("OpenAI prompt response missing text content".to_string())
744 })?;
745
746 let prompt_tokens = json
747 .get("usage")
748 .and_then(|usage| usage.get("prompt_tokens"))
749 .and_then(JsonValue::as_i64)
750 .and_then(|value| u64::try_from(value).ok());
751 let completion_tokens = json
752 .get("usage")
753 .and_then(|usage| usage.get("completion_tokens"))
754 .and_then(JsonValue::as_i64)
755 .and_then(|value| u64::try_from(value).ok());
756 let total_tokens = json
757 .get("usage")
758 .and_then(|usage| usage.get("total_tokens"))
759 .and_then(JsonValue::as_i64)
760 .and_then(|value| u64::try_from(value).ok())
761 .or_else(|| match (prompt_tokens, completion_tokens) {
762 (Some(prompt), Some(completion)) => Some(prompt.saturating_add(completion)),
763 _ => None,
764 });
765
766 let stop_reason = first_choice
767 .get("finish_reason")
768 .and_then(JsonValue::as_str)
769 .map(str::to_string);
770
771 Ok(AiPromptResponse {
772 provider: "openai",
773 model,
774 output_text,
775 output_chunks: None,
776 prompt_tokens,
777 completion_tokens,
778 total_tokens,
779 stop_reason,
780 })
781}
782
783fn parse_openai_streaming_prompt_response(
784 body: &str,
785 requested_model: &str,
786) -> RedDBResult<AiPromptResponse> {
787 let mut model = requested_model.to_string();
788 let mut chunks = Vec::new();
789 let mut prompt_tokens = None;
790 let mut completion_tokens = None;
791 let mut total_tokens = None;
792 let mut stop_reason = None;
793
794 for line in body.lines() {
795 let line = line.trim();
796 let Some(data) = line.strip_prefix("data:") else {
797 continue;
798 };
799 let data = data.trim();
800 if data.is_empty() {
801 continue;
802 }
803 if data == "[DONE]" {
804 break;
805 }
806
807 let parsed = parse_json(data).map_err(|err| {
808 RedDBError::Query(format!(
809 "invalid OpenAI streaming prompt JSON response: {err}"
810 ))
811 })?;
812 let json = JsonValue::from(parsed);
813 if let Some(value) = json.get("model").and_then(JsonValue::as_str) {
814 model = value.to_string();
815 }
816 if let Some(usage) = json.get("usage") {
817 prompt_tokens = usage
818 .get("prompt_tokens")
819 .and_then(JsonValue::as_i64)
820 .and_then(|value| u64::try_from(value).ok())
821 .or(prompt_tokens);
822 completion_tokens = usage
823 .get("completion_tokens")
824 .and_then(JsonValue::as_i64)
825 .and_then(|value| u64::try_from(value).ok())
826 .or(completion_tokens);
827 total_tokens = usage
828 .get("total_tokens")
829 .and_then(JsonValue::as_i64)
830 .and_then(|value| u64::try_from(value).ok())
831 .or(total_tokens);
832 }
833
834 let Some(choices) = json.get("choices").and_then(JsonValue::as_array) else {
835 continue;
836 };
837 let Some(first_choice) = choices.first() else {
838 continue;
839 };
840 if let Some(reason) = first_choice
841 .get("finish_reason")
842 .and_then(JsonValue::as_str)
843 {
844 stop_reason = Some(reason.to_string());
845 }
846 if let Some(text) = first_choice
847 .get("delta")
848 .and_then(|delta| delta.get("content"))
849 .and_then(JsonValue::as_str)
850 {
851 if !text.is_empty() {
852 chunks.push(text.to_string());
853 }
854 }
855 }
856
857 if chunks.is_empty() {
858 return Err(RedDBError::Query(
859 "OpenAI streaming prompt response missing text content".to_string(),
860 ));
861 }
862
863 let output_text = chunks.concat();
864 let total_tokens = total_tokens.or_else(|| match (prompt_tokens, completion_tokens) {
865 (Some(prompt), Some(completion)) => Some(prompt.saturating_add(completion)),
866 _ => None,
867 });
868
869 Ok(AiPromptResponse {
870 provider: "openai",
871 model,
872 output_text,
873 output_chunks: Some(chunks),
874 prompt_tokens,
875 completion_tokens,
876 total_tokens,
877 stop_reason,
878 })
879}
880
881fn parse_anthropic_prompt_response(
882 body: &str,
883 requested_model: &str,
884) -> RedDBResult<AiPromptResponse> {
885 let parsed = parse_json(body).map_err(|err| {
886 RedDBError::Query(format!("invalid Anthropic prompt JSON response: {err}"))
887 })?;
888 let json = JsonValue::from(parsed);
889
890 let model = json
891 .get("model")
892 .and_then(JsonValue::as_str)
893 .unwrap_or(requested_model)
894 .to_string();
895
896 let Some(content_parts) = json.get("content").and_then(JsonValue::as_array) else {
897 return Err(RedDBError::Query(
898 "Anthropic prompt response missing 'content' array".to_string(),
899 ));
900 };
901
902 let output_text = extract_text_from_parts(content_parts).ok_or_else(|| {
903 RedDBError::Query("Anthropic prompt response missing text content".to_string())
904 })?;
905
906 let prompt_tokens = json
907 .get("usage")
908 .and_then(|usage| usage.get("input_tokens"))
909 .and_then(JsonValue::as_i64)
910 .and_then(|value| u64::try_from(value).ok());
911 let completion_tokens = json
912 .get("usage")
913 .and_then(|usage| usage.get("output_tokens"))
914 .and_then(JsonValue::as_i64)
915 .and_then(|value| u64::try_from(value).ok());
916 let total_tokens = match (prompt_tokens, completion_tokens) {
917 (Some(prompt), Some(completion)) => Some(prompt.saturating_add(completion)),
918 _ => None,
919 };
920
921 let stop_reason = json
922 .get("stop_reason")
923 .and_then(JsonValue::as_str)
924 .map(str::to_string);
925
926 Ok(AiPromptResponse {
927 provider: "anthropic",
928 model,
929 output_text,
930 output_chunks: None,
931 prompt_tokens,
932 completion_tokens,
933 total_tokens,
934 stop_reason,
935 })
936}
937
938fn parse_openai_embedding_response(body: &str) -> RedDBResult<OpenAiEmbeddingResponse> {
939 let parsed = parse_json(body).map_err(|err| {
940 RedDBError::Query(format!("invalid OpenAI embeddings JSON response: {err}"))
941 })?;
942 let json = JsonValue::from(parsed);
943
944 let model = json
945 .get("model")
946 .and_then(JsonValue::as_str)
947 .unwrap_or(DEFAULT_OPENAI_EMBEDDING_MODEL)
948 .to_string();
949
950 let Some(data) = json.get("data").and_then(JsonValue::as_array) else {
951 return Err(RedDBError::Query(
952 "OpenAI response missing 'data' array".to_string(),
953 ));
954 };
955
956 let mut rows: Vec<(usize, Vec<f32>)> = Vec::with_capacity(data.len());
957 for (position, item) in data.iter().enumerate() {
958 let index = item
959 .get("index")
960 .and_then(JsonValue::as_i64)
961 .and_then(|value| usize::try_from(value).ok())
962 .unwrap_or(position);
963
964 let Some(embedding_values) = item.get("embedding").and_then(JsonValue::as_array) else {
965 return Err(RedDBError::Query(
966 "OpenAI response contains item without 'embedding' array".to_string(),
967 ));
968 };
969 if embedding_values.is_empty() {
970 return Err(RedDBError::Query(
971 "OpenAI response contains empty embedding vector".to_string(),
972 ));
973 }
974
975 let mut embedding = Vec::with_capacity(embedding_values.len());
976 for value in embedding_values {
977 let Some(number) = value.as_f64() else {
978 return Err(RedDBError::Query(
979 "OpenAI response contains non-numeric embedding value".to_string(),
980 ));
981 };
982 embedding.push(number as f32);
983 }
984 rows.push((index, embedding));
985 }
986 rows.sort_by_key(|(index, _)| *index);
987 let embeddings = rows.into_iter().map(|(_, embedding)| embedding).collect();
988
989 let prompt_tokens = json
990 .get("usage")
991 .and_then(|usage| usage.get("prompt_tokens"))
992 .and_then(JsonValue::as_i64)
993 .and_then(|value| u64::try_from(value).ok());
994 let total_tokens = json
995 .get("usage")
996 .and_then(|usage| usage.get("total_tokens"))
997 .and_then(JsonValue::as_i64)
998 .and_then(|value| u64::try_from(value).ok());
999
1000 Ok(OpenAiEmbeddingResponse {
1001 provider: "openai",
1002 model,
1003 embeddings,
1004 prompt_tokens,
1005 total_tokens,
1006 })
1007}
1008
1009#[cfg(test)]
1010mod tests {
1011 use super::*;
1012
1013 #[test]
1014 fn parse_openai_embedding_response_extracts_vectors() {
1015 let body = r#"{
1016 "object":"list",
1017 "data":[
1018 {"object":"embedding","index":1,"embedding":[0.3,0.4]},
1019 {"object":"embedding","index":0,"embedding":[0.1,0.2]}
1020 ],
1021 "model":"text-embedding-3-small",
1022 "usage":{"prompt_tokens":12,"total_tokens":12}
1023 }"#;
1024
1025 let result = parse_openai_embedding_response(body).expect("response should parse");
1026 assert_eq!(result.provider, "openai");
1027 assert_eq!(result.model, "text-embedding-3-small");
1028 assert_eq!(result.embeddings.len(), 2);
1029 assert_eq!(result.embeddings[0], vec![0.1, 0.2]);
1030 assert_eq!(result.embeddings[1], vec![0.3, 0.4]);
1031 assert_eq!(result.prompt_tokens, Some(12));
1032 assert_eq!(result.total_tokens, Some(12));
1033 }
1034
1035 #[test]
1036 fn openai_error_message_extracts_nested_message() {
1037 let body = r#"{"error":{"message":"bad api key","type":"invalid_request_error"}}"#;
1038 assert_eq!(openai_error_message(body).as_deref(), Some("bad api key"));
1039 }
1040
1041 #[test]
1042 fn parse_openai_prompt_response_extracts_text_and_usage() {
1043 let body = r#"{
1044 "id":"chatcmpl_1",
1045 "object":"chat.completion",
1046 "model":"gpt-4.1-mini",
1047 "choices":[
1048 {
1049 "index":0,
1050 "finish_reason":"stop",
1051 "message":{"role":"assistant","content":"Resumo pronto."}
1052 }
1053 ],
1054 "usage":{"prompt_tokens":10,"completion_tokens":4,"total_tokens":14}
1055 }"#;
1056
1057 let parsed =
1058 parse_openai_prompt_response(body, DEFAULT_OPENAI_PROMPT_MODEL).expect("parse");
1059 assert_eq!(parsed.provider, "openai");
1060 assert_eq!(parsed.model, "gpt-4.1-mini");
1061 assert_eq!(parsed.output_text, "Resumo pronto.");
1062 assert_eq!(parsed.prompt_tokens, Some(10));
1063 assert_eq!(parsed.completion_tokens, Some(4));
1064 assert_eq!(parsed.total_tokens, Some(14));
1065 assert_eq!(parsed.stop_reason.as_deref(), Some("stop"));
1066 }
1067
1068 #[test]
1069 fn parse_anthropic_prompt_response_extracts_text_and_usage() {
1070 let body = r#"{
1071 "id":"msg_1",
1072 "model":"claude-3-5-haiku-latest",
1073 "type":"message",
1074 "content":[{"type":"text","text":"Action complete."}],
1075 "usage":{"input_tokens":11,"output_tokens":5},
1076 "stop_reason":"end_turn"
1077 }"#;
1078
1079 let parsed =
1080 parse_anthropic_prompt_response(body, DEFAULT_ANTHROPIC_PROMPT_MODEL).expect("parse");
1081 assert_eq!(parsed.provider, "anthropic");
1082 assert_eq!(parsed.model, "claude-3-5-haiku-latest");
1083 assert_eq!(parsed.output_text, "Action complete.");
1084 assert_eq!(parsed.prompt_tokens, Some(11));
1085 assert_eq!(parsed.completion_tokens, Some(5));
1086 assert_eq!(parsed.total_tokens, Some(16));
1087 assert_eq!(parsed.stop_reason.as_deref(), Some("end_turn"));
1088 }
1089
1090 #[test]
1091 fn resolve_api_key_prefers_vault_secret_over_legacy_config() {
1092 let provider = AiProvider::OpenAi;
1093 let alias = "vault_unit_alias";
1094 let secret_path = ai_api_secret_path(&provider, alias);
1095 let legacy_key = ai_api_legacy_config_key(&provider, alias);
1096
1097 let resolved = resolve_api_key(&provider, Some(alias), |key| {
1098 if key == secret_path {
1099 Ok(Some("vault-key".to_string()))
1100 } else if key == legacy_key {
1101 Ok(Some("legacy-key".to_string()))
1102 } else {
1103 Ok(None)
1104 }
1105 })
1106 .expect("resolve");
1107
1108 assert_eq!(resolved, "vault-key");
1109 }
1110
1111 #[test]
1112 fn resolve_api_key_uses_default_vault_secret_path() {
1113 let provider = AiProvider::OpenAi;
1114 let secret_path = ai_api_secret_path(&provider, "default");
1115
1116 let resolved = resolve_api_key(&provider, None, |key| {
1117 if key == secret_path {
1118 Ok(Some("default-vault-key".to_string()))
1119 } else {
1120 Ok(None)
1121 }
1122 })
1123 .expect("resolve");
1124
1125 assert_eq!(resolved, "default-vault-key");
1126 }
1127
1128 #[test]
1129 fn openai_prompt_payload_includes_temperature_and_seed_when_present() {
1130 let payload = build_openai_prompt_payload(
1131 "gpt-4.1-mini",
1132 "hello",
1133 Some(0.0),
1134 Some(42),
1135 Some(128),
1136 false,
1137 );
1138 let parsed = JsonValue::from(parse_json(&payload).expect("valid json"));
1139
1140 assert_eq!(
1141 parsed.get("temperature").and_then(JsonValue::as_f64),
1142 Some(0.0)
1143 );
1144 assert_eq!(parsed.get("seed").and_then(JsonValue::as_u64), Some(42));
1145 assert_eq!(
1146 parsed.get("max_tokens").and_then(JsonValue::as_u64),
1147 Some(128)
1148 );
1149 }
1150
1151 #[test]
1152 fn openai_prompt_payload_omits_seed_when_none() {
1153 let payload =
1154 build_openai_prompt_payload("gpt-4.1-mini", "hello", Some(0.0), None, None, false);
1155 let parsed = JsonValue::from(parse_json(&payload).expect("valid json"));
1156
1157 assert!(parsed.get("seed").is_none());
1158 assert!(parsed.get("stream").is_none());
1159 assert_eq!(
1160 parsed.get("temperature").and_then(JsonValue::as_f64),
1161 Some(0.0)
1162 );
1163 }
1164
1165 #[test]
1166 fn openai_prompt_payload_enables_stream_options() {
1167 let payload =
1168 build_openai_prompt_payload("gpt-4.1-mini", "hello", Some(0.0), None, None, true);
1169 let parsed = JsonValue::from(parse_json(&payload).expect("valid json"));
1170
1171 assert_eq!(
1172 parsed.get("stream").and_then(JsonValue::as_bool),
1173 Some(true)
1174 );
1175 assert_eq!(
1176 parsed
1177 .get("stream_options")
1178 .and_then(|value| value.get("include_usage"))
1179 .and_then(JsonValue::as_bool),
1180 Some(true)
1181 );
1182 }
1183
1184 #[test]
1185 fn openai_streaming_prompt_response_collects_delta_chunks() {
1186 let body = concat!(
1187 "data: {\"model\":\"gpt-test\",\"choices\":[{\"delta\":{\"content\":\"login \"},\"finish_reason\":null}]}\n\n",
1188 "data: {\"model\":\"gpt-test\",\"choices\":[{\"delta\":{\"content\":\"failed\"},\"finish_reason\":null}]}\n\n",
1189 "data: {\"model\":\"gpt-test\",\"choices\":[{\"delta\":{},\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":12,\"completion_tokens\":2,\"total_tokens\":14}}\n\n",
1190 "data: [DONE]\n\n",
1191 );
1192 let parsed = parse_openai_streaming_prompt_response(body, "fallback").unwrap();
1193
1194 assert_eq!(parsed.model, "gpt-test");
1195 assert_eq!(parsed.output_text, "login failed");
1196 assert_eq!(
1197 parsed.output_chunks.as_deref(),
1198 Some(["login ".to_string(), "failed".to_string()].as_slice())
1199 );
1200 assert_eq!(parsed.prompt_tokens, Some(12));
1201 assert_eq!(parsed.completion_tokens, Some(2));
1202 assert_eq!(parsed.total_tokens, Some(14));
1203 assert_eq!(parsed.stop_reason.as_deref(), Some("stop"));
1204 }
1205
1206 #[tokio::test]
1207 async fn openai_prompt_async_rejects_empty_model() {
1208 let transport = crate::runtime::ai::transport::AiTransport::new(Default::default());
1209 let request = OpenAiPromptRequest {
1210 api_key: "key".to_string(),
1211 model: " ".to_string(),
1212 prompt: "hello".to_string(),
1213 temperature: None,
1214 seed: None,
1215 max_output_tokens: None,
1216 api_base: "https://api.openai.com/v1".to_string(),
1217 stream: false,
1218 };
1219 let err = openai_prompt_async(&transport, request).await.unwrap_err();
1220 assert!(err.to_string().contains("model cannot be empty"));
1221 }
1222
1223 #[tokio::test]
1224 async fn openai_prompt_async_rejects_empty_prompt() {
1225 let transport = crate::runtime::ai::transport::AiTransport::new(Default::default());
1226 let request = OpenAiPromptRequest {
1227 api_key: "key".to_string(),
1228 model: "gpt-4.1-mini".to_string(),
1229 prompt: "".to_string(),
1230 temperature: None,
1231 seed: None,
1232 max_output_tokens: None,
1233 api_base: "https://api.openai.com/v1".to_string(),
1234 stream: false,
1235 };
1236 let err = openai_prompt_async(&transport, request).await.unwrap_err();
1237 assert!(err.to_string().contains("prompt cannot be empty"));
1238 }
1239
1240 #[tokio::test]
1241 async fn anthropic_prompt_async_rejects_empty_api_key() {
1242 let transport = crate::runtime::ai::transport::AiTransport::new(Default::default());
1243 let request = AnthropicPromptRequest {
1244 api_key: " ".to_string(),
1245 model: "claude-3-5-haiku-latest".to_string(),
1246 prompt: "hello".to_string(),
1247 temperature: None,
1248 max_output_tokens: None,
1249 api_base: "https://api.anthropic.com/v1".to_string(),
1250 anthropic_version: DEFAULT_ANTHROPIC_VERSION.to_string(),
1251 };
1252 let err = anthropic_prompt_async(&transport, request)
1253 .await
1254 .unwrap_err();
1255 assert!(err.to_string().contains("API key cannot be empty"));
1256 }
1257}
1258
1259#[derive(Debug, Clone, PartialEq, Eq)]
1265pub enum AiProvider {
1266 OpenAi,
1267 Anthropic,
1268 Groq,
1269 OpenRouter,
1270 Together,
1271 Venice,
1272 Ollama,
1273 DeepSeek,
1274 HuggingFace,
1275 Local,
1276 Custom(String),
1277}
1278
1279impl AiProvider {
1280 pub fn token(&self) -> &str {
1281 match self {
1282 Self::OpenAi => "openai",
1283 Self::Anthropic => "anthropic",
1284 Self::Groq => "groq",
1285 Self::OpenRouter => "openrouter",
1286 Self::Together => "together",
1287 Self::Venice => "venice",
1288 Self::Ollama => "ollama",
1289 Self::DeepSeek => "deepseek",
1290 Self::HuggingFace => "huggingface",
1291 Self::Local => "local",
1292 Self::Custom(name) => name.as_str(),
1293 }
1294 }
1295
1296 pub fn default_prompt_model(&self) -> &str {
1297 match self {
1298 Self::OpenAi => DEFAULT_OPENAI_PROMPT_MODEL,
1299 Self::Anthropic => DEFAULT_ANTHROPIC_PROMPT_MODEL,
1300 Self::Groq => "llama-3.3-70b-versatile",
1301 Self::OpenRouter => "auto",
1302 Self::Together => "meta-llama/Meta-Llama-3-8B-Instruct",
1303 Self::Venice => "llama-3.3-70b",
1304 Self::Ollama => "llama3",
1305 Self::DeepSeek => "deepseek-chat",
1306 Self::HuggingFace => "mistralai/Mistral-7B-Instruct-v0.3",
1307 Self::Local => "sentence-transformers/all-MiniLM-L6-v2",
1308 Self::Custom(_) => DEFAULT_OPENAI_PROMPT_MODEL,
1309 }
1310 }
1311
1312 pub fn prompt_model_env_name(&self) -> String {
1313 format!("REDDB_{}_PROMPT_MODEL", self.token().to_ascii_uppercase())
1314 }
1315
1316 pub fn default_embedding_model(&self) -> &str {
1317 match self {
1318 Self::Ollama => "nomic-embed-text",
1319 Self::HuggingFace | Self::Local => "sentence-transformers/all-MiniLM-L6-v2",
1320 _ => DEFAULT_OPENAI_EMBEDDING_MODEL,
1321 }
1322 }
1323
1324 pub fn default_api_base(&self) -> &str {
1325 match self {
1326 Self::OpenAi => DEFAULT_OPENAI_API_BASE,
1327 Self::Anthropic => DEFAULT_ANTHROPIC_API_BASE,
1328 Self::Groq => "https://api.groq.com/openai/v1",
1329 Self::OpenRouter => "https://openrouter.ai/api/v1",
1330 Self::Together => "https://api.together.xyz/v1",
1331 Self::Venice => "https://api.venice.ai/api/v1",
1332 Self::Ollama => "http://localhost:11434/v1",
1333 Self::DeepSeek => "https://api.deepseek.com/v1",
1334 Self::HuggingFace => "https://api-inference.huggingface.co",
1335 Self::Local => "local",
1336 Self::Custom(base) => base.as_str(),
1337 }
1338 }
1339
1340 pub fn api_base_env_name(&self) -> String {
1341 format!("REDDB_{}_API_BASE", self.token().to_ascii_uppercase())
1342 }
1343
1344 pub fn default_key_env_name(&self) -> String {
1345 format!("REDDB_{}_API_KEY", self.token().to_ascii_uppercase())
1346 }
1347
1348 pub fn alias_key_env_name(&self, alias: &str) -> String {
1349 let normalized = normalize_alias_token(alias);
1350 format!(
1351 "REDDB_{}_API_KEY_{normalized}",
1352 self.token().to_ascii_uppercase()
1353 )
1354 }
1355
1356 pub fn resolve_api_base(&self) -> String {
1357 if let Ok(value) = std::env::var(self.api_base_env_name()) {
1358 let value = value.trim().to_string();
1359 if !value.is_empty() {
1360 return value;
1361 }
1362 }
1363 self.default_api_base().to_string()
1364 }
1365
1366 pub fn resolve_api_base_with_kv<F>(&self, alias: &str, kv_getter: &F) -> String
1368 where
1369 F: Fn(&str) -> crate::RedDBResult<Option<String>>,
1370 {
1371 if let Ok(value) = std::env::var(self.api_base_env_name()) {
1373 let value = value.trim().to_string();
1374 if !value.is_empty() {
1375 return value;
1376 }
1377 }
1378 let kv_key = format!("red.config.ai.{}.{alias}.base_url", self.token());
1380 if let Ok(Some(value)) = kv_getter(&kv_key) {
1381 let value = value.trim().to_string();
1382 if !value.is_empty() {
1383 return value;
1384 }
1385 }
1386 self.default_api_base().to_string()
1387 }
1388
1389 pub fn is_openai_compatible(&self) -> bool {
1391 matches!(
1392 self,
1393 Self::OpenAi
1394 | Self::Groq
1395 | Self::OpenRouter
1396 | Self::Together
1397 | Self::Venice
1398 | Self::Ollama
1399 | Self::DeepSeek
1400 | Self::Custom(_)
1401 )
1402 }
1403
1404 pub fn requires_api_key(&self) -> bool {
1406 !matches!(self, Self::Ollama | Self::Local)
1407 }
1408}
1409
1410pub fn parse_provider(name: &str) -> crate::RedDBResult<AiProvider> {
1412 match name.trim().to_ascii_lowercase().as_str() {
1413 "openai" => Ok(AiProvider::OpenAi),
1414 "anthropic" => Ok(AiProvider::Anthropic),
1415 "groq" => Ok(AiProvider::Groq),
1416 "openrouter" | "open_router" => Ok(AiProvider::OpenRouter),
1417 "together" => Ok(AiProvider::Together),
1418 "venice" => Ok(AiProvider::Venice),
1419 "ollama" => Ok(AiProvider::Ollama),
1420 "deepseek" | "deep_seek" => Ok(AiProvider::DeepSeek),
1421 "huggingface" | "hf" => Ok(AiProvider::HuggingFace),
1422 "local" => Ok(AiProvider::Local),
1423 other => {
1424 if other.starts_with("http://") || other.starts_with("https://") {
1426 Ok(AiProvider::Custom(other.to_string()))
1427 } else {
1428 Err(crate::RedDBError::Query(format!(
1429 "unsupported AI provider '{other}'; expected: openai, anthropic, groq, \
1430 openrouter, together, venice, ollama, deepseek, huggingface, local"
1431 )))
1432 }
1433 }
1434 }
1435}
1436
1437pub fn resolve_default_provider<F>(kv_getter: &F) -> AiProvider
1442where
1443 F: Fn(&str) -> crate::RedDBResult<Option<String>>,
1444{
1445 if let Ok(value) = std::env::var("REDDB_AI_PROVIDER") {
1447 let value = value.trim().to_string();
1448 if !value.is_empty() {
1449 if let Ok(provider) = parse_provider(&value) {
1450 return provider;
1451 }
1452 }
1453 }
1454 if let Ok(Some(value)) = kv_getter("red.config.ai.default.provider") {
1456 let value = value.trim().to_string();
1457 if !value.is_empty() {
1458 if let Ok(provider) = parse_provider(&value) {
1459 return provider;
1460 }
1461 }
1462 }
1463 AiProvider::OpenAi
1464}
1465
1466pub fn resolve_default_model<F>(provider: &AiProvider, kv_getter: &F) -> String
1471where
1472 F: Fn(&str) -> crate::RedDBResult<Option<String>>,
1473{
1474 if let Ok(value) = std::env::var("REDDB_AI_MODEL") {
1476 let value = value.trim().to_string();
1477 if !value.is_empty() {
1478 return value;
1479 }
1480 }
1481 if let Ok(value) = std::env::var(provider.prompt_model_env_name()) {
1483 let value = value.trim().to_string();
1484 if !value.is_empty() {
1485 return value;
1486 }
1487 }
1488 if let Ok(Some(value)) = kv_getter("red.config.ai.default.model") {
1490 let value = value.trim().to_string();
1491 if !value.is_empty() {
1492 return value;
1493 }
1494 }
1495 provider.default_prompt_model().to_string()
1496}
1497
1498pub fn resolve_defaults_from_runtime(
1500 runtime: &crate::runtime::RedDBRuntime,
1501) -> (AiProvider, String) {
1502 use crate::application::ports::RuntimeEntityPort;
1503 let kv_getter = |key: &str| -> crate::RedDBResult<Option<String>> {
1504 match runtime.get_kv("red_config", key)? {
1505 Some((crate::storage::schema::Value::Text(s), _)) => Ok(Some(s.to_string())),
1506 _ => Ok(None),
1507 }
1508 };
1509 let provider = resolve_default_provider(&kv_getter);
1510 let model = resolve_default_model(&provider, &kv_getter);
1511 (provider, model)
1512}
1513
1514pub fn resolve_defaults_from_runtime_port<
1516 P: crate::application::ports::RuntimeEntityPort + ?Sized,
1517>(
1518 runtime: &P,
1519) -> (AiProvider, String) {
1520 let kv_getter = |key: &str| -> crate::RedDBResult<Option<String>> {
1521 match runtime.get_kv("red_config", key)? {
1522 Some((crate::storage::schema::Value::Text(s), _)) => Ok(Some(s.to_string())),
1523 _ => Ok(None),
1524 }
1525 };
1526 let provider = resolve_default_provider(&kv_getter);
1527 let model = resolve_default_model(&provider, &kv_getter);
1528 (provider, model)
1529}
1530
1531pub fn resolve_api_key<F>(
1540 provider: &AiProvider,
1541 credential_alias: Option<&str>,
1542 kv_getter: F,
1543) -> crate::RedDBResult<String>
1544where
1545 F: Fn(&str) -> crate::RedDBResult<Option<String>>,
1546{
1547 if !provider.requires_api_key() {
1549 if let Ok(value) = std::env::var(provider.default_key_env_name()) {
1551 let value = value.trim().to_string();
1552 if !value.is_empty() {
1553 return Ok(value);
1554 }
1555 }
1556 return Ok(String::new());
1557 }
1558
1559 if let Some(alias) = credential_alias.map(str::trim).filter(|a| !a.is_empty()) {
1560 if let Some(key) = resolve_key_from_env_alias(provider, alias) {
1562 return Ok(key);
1563 }
1564 if let Some(key) = kv_getter(&ai_api_secret_path(provider, alias))? {
1565 if !key.trim().is_empty() {
1566 return Ok(key);
1567 }
1568 }
1569 if let Some(secret_ref) = kv_getter(&ai_api_secret_ref_config_key(provider, alias))? {
1570 if let Some(key) = kv_getter(secret_ref.trim())? {
1571 if !key.trim().is_empty() {
1572 return Ok(key);
1573 }
1574 }
1575 }
1576 let legacy_key = ai_api_legacy_config_key(provider, alias);
1577 if let Some(key) = kv_getter(&legacy_key)? {
1578 if !key.trim().is_empty() {
1579 return Ok(key);
1580 }
1581 }
1582 return Err(crate::RedDBError::Query(format!(
1583 "credential '{alias}' not found for {}. Set env {} or store it in the vault",
1584 provider.token(),
1585 provider.alias_key_env_name(alias)
1586 )));
1587 }
1588
1589 if let Ok(value) = std::env::var(provider.default_key_env_name()) {
1591 let value = value.trim().to_string();
1592 if !value.is_empty() {
1593 return Ok(value);
1594 }
1595 }
1596
1597 if let Some(key) = kv_getter(&ai_api_secret_path(provider, "default"))? {
1598 if !key.trim().is_empty() {
1599 return Ok(key);
1600 }
1601 }
1602 if let Some(secret_ref) = kv_getter(&ai_api_secret_ref_config_key(provider, "default"))? {
1603 if let Some(key) = kv_getter(secret_ref.trim())? {
1604 if !key.trim().is_empty() {
1605 return Ok(key);
1606 }
1607 }
1608 }
1609 if let Some(key) = kv_getter(&ai_api_legacy_config_key(provider, "default"))? {
1610 if !key.trim().is_empty() {
1611 return Ok(key);
1612 }
1613 }
1614
1615 let legacy_short_key = format!("{}/default", provider.token());
1616 if let Some(key) = kv_getter(&legacy_short_key)? {
1617 if !key.trim().is_empty() {
1618 return Ok(key);
1619 }
1620 }
1621
1622 Err(crate::RedDBError::Query(format!(
1623 "missing {} API key. Set {} or provide credential alias",
1624 provider.token(),
1625 provider.default_key_env_name()
1626 )))
1627}
1628
1629pub fn ai_api_secret_path(provider: &AiProvider, alias: &str) -> String {
1630 format!(
1631 "red.secret.ai.{}.{}.api_key",
1632 provider.token(),
1633 normalize_credential_alias_path(alias)
1634 )
1635}
1636
1637pub fn ai_api_secret_ref_config_key(provider: &AiProvider, alias: &str) -> String {
1638 format!(
1639 "red.config.ai.{}.{}.secret_ref",
1640 provider.token(),
1641 normalize_credential_alias_path(alias)
1642 )
1643}
1644
1645pub fn ai_api_legacy_config_key(provider: &AiProvider, alias: &str) -> String {
1646 format!(
1647 "red.config.ai.{}.{}.key",
1648 provider.token(),
1649 normalize_credential_alias_path(alias)
1650 )
1651}
1652
1653fn normalize_credential_alias_path(alias: &str) -> String {
1654 let alias = alias.trim();
1655 if alias.is_empty() {
1656 "default".to_string()
1657 } else {
1658 alias.to_ascii_lowercase()
1659 }
1660}
1661
1662fn resolve_key_from_env_alias(provider: &AiProvider, alias: &str) -> Option<String> {
1663 let env_name = provider.alias_key_env_name(alias);
1664 std::env::var(env_name)
1665 .ok()
1666 .map(|v| v.trim().to_string())
1667 .filter(|v| !v.is_empty())
1668}
1669
1670fn normalize_alias_token(alias: &str) -> String {
1671 let mut out = String::with_capacity(alias.len());
1672 for character in alias.chars() {
1673 if character.is_ascii_alphanumeric() {
1674 out.push(character.to_ascii_uppercase());
1675 } else {
1676 out.push('_');
1677 }
1678 }
1679 while out.contains("__") {
1680 out = out.replace("__", "_");
1681 }
1682 out.trim_matches('_').to_string()
1683}
1684
1685pub fn resolve_api_key_from_runtime(
1687 provider: &AiProvider,
1688 credential_alias: Option<&str>,
1689 runtime: &crate::runtime::RedDBRuntime,
1690) -> crate::RedDBResult<String> {
1691 use crate::application::ports::RuntimeEntityPort;
1692 resolve_api_key(provider, credential_alias, |kv_key| {
1693 if kv_key.starts_with("red.secret.") {
1694 return Ok(runtime.vault_kv_get(kv_key));
1695 }
1696 match runtime.get_kv("red_config", kv_key)? {
1697 Some((crate::storage::schema::Value::Text(secret), _)) => Ok(Some(secret.to_string())),
1698 Some(_) => Ok(None),
1699 None => Ok(None),
1700 }
1701 })
1702}
1703
1704pub fn huggingface_embeddings(
1710 api_key: &str,
1711 model: &str,
1712 inputs: &[String],
1713 api_base: &str,
1714) -> crate::RedDBResult<OpenAiEmbeddingResponse> {
1715 let url = format!("{api_base}/pipeline/feature-extraction/{model}");
1716 let mut embeddings = Vec::with_capacity(inputs.len());
1717
1718 for input in inputs {
1719 let payload = crate::serde_json::json!({ "inputs": input }).to_string_compact();
1720 let (status, body_str) = http_post_json(&url, api_key, &[], payload, 90)
1721 .map_err(|e| crate::RedDBError::Query(format!("HuggingFace API error: {e}")))?;
1722 if !(200..300).contains(&status) {
1723 return Err(crate::RedDBError::Query(format!(
1724 "HuggingFace API error (status {status}): {body_str}"
1725 )));
1726 }
1727 let body: JsonValue = crate::serde_json::from_str(&body_str).map_err(|e| {
1728 crate::RedDBError::Query(format!("HuggingFace response parse error: {e}"))
1729 })?;
1730
1731 let vector: Vec<f32> = match &body {
1733 JsonValue::Array(outer) => outer
1734 .iter()
1735 .filter_map(|v| v.as_f64().map(|n| n as f32))
1736 .collect(),
1737 _ => {
1738 return Err(crate::RedDBError::Query(
1739 "unexpected HuggingFace embedding response format".to_string(),
1740 ))
1741 }
1742 };
1743 embeddings.push(vector);
1744 }
1745
1746 Ok(OpenAiEmbeddingResponse {
1747 provider: "huggingface",
1748 model: model.to_string(),
1749 embeddings,
1750 prompt_tokens: None,
1751 total_tokens: None,
1752 })
1753}
1754
1755pub fn huggingface_prompt(
1757 api_key: &str,
1758 model: &str,
1759 prompt: &str,
1760 temperature: Option<f32>,
1761 max_tokens: Option<usize>,
1762 api_base: &str,
1763) -> crate::RedDBResult<AiPromptResponse> {
1764 let url = format!("{api_base}/models/{model}");
1765 let mut params = Map::new();
1766 if let Some(t) = temperature {
1767 params.insert("temperature".into(), JsonValue::Number(t as f64));
1768 }
1769 params.insert(
1770 "max_new_tokens".into(),
1771 JsonValue::Number(max_tokens.unwrap_or(512) as f64),
1772 );
1773 let payload = crate::serde_json::json!({
1774 "inputs": prompt,
1775 "parameters": JsonValue::Object(params)
1776 });
1777
1778 let (status, body_str) =
1779 http_post_json(&url, api_key, &[], payload.to_string_compact(), 120)
1780 .map_err(|e| crate::RedDBError::Query(format!("HuggingFace API error: {e}")))?;
1781 if !(200..300).contains(&status) {
1782 return Err(crate::RedDBError::Query(format!(
1783 "HuggingFace API error (status {status}): {body_str}"
1784 )));
1785 }
1786 let body: JsonValue = crate::serde_json::from_str(&body_str)
1787 .map_err(|e| crate::RedDBError::Query(format!("HuggingFace response parse error: {e}")))?;
1788
1789 let output_text = match &body {
1790 JsonValue::Array(arr) => arr
1791 .first()
1792 .and_then(|v| v.get("generated_text"))
1793 .and_then(JsonValue::as_str)
1794 .unwrap_or("")
1795 .to_string(),
1796 _ => body
1797 .get("generated_text")
1798 .and_then(JsonValue::as_str)
1799 .unwrap_or("")
1800 .to_string(),
1801 };
1802
1803 Ok(AiPromptResponse {
1804 provider: "huggingface",
1805 model: model.to_string(),
1806 output_text,
1807 output_chunks: None,
1808 prompt_tokens: None,
1809 completion_tokens: None,
1810 total_tokens: None,
1811 stop_reason: None,
1812 })
1813}
1814
1815pub fn local_embeddings(
1821 _model_id: &str,
1822 _texts: &[String],
1823) -> crate::RedDBResult<OpenAiEmbeddingResponse> {
1824 Err(crate::RedDBError::FeatureNotEnabled(
1825 "local model inference requires the 'local-models' feature flag. \
1826 Build with: cargo build --features local-models. \
1827 Alternatively, use 'ollama' provider with a local Ollama server."
1828 .to_string(),
1829 ))
1830}
1831
1832pub fn local_prompt(_model_id: &str, _prompt: &str) -> crate::RedDBResult<AiPromptResponse> {
1834 Err(crate::RedDBError::FeatureNotEnabled(
1835 "local model inference requires the 'local-models' feature flag. \
1836 Build with: cargo build --features local-models. \
1837 Alternatively, use 'ollama' provider with a local Ollama server."
1838 .to_string(),
1839 ))
1840}
1841
1842fn grpc_collect_embedding_inputs(
1855 runtime: &crate::runtime::RedDBRuntime,
1856 payload: &JsonValue,
1857) -> crate::RedDBResult<Vec<String>> {
1858 if let Some(source_query) = payload
1859 .get("source_query")
1860 .and_then(|v| v.as_str())
1861 .map(str::trim)
1862 .filter(|s| !s.is_empty())
1863 {
1864 return grpc_collect_inputs_from_source_query(runtime, payload, source_query);
1865 }
1866
1867 if let Some(arr) = payload.get("inputs").and_then(|v| v.as_array()) {
1868 let mut out = Vec::with_capacity(arr.len());
1869 for (idx, v) in arr.iter().enumerate() {
1870 let text = v.as_str().ok_or_else(|| {
1871 crate::RedDBError::Query(format!("field 'inputs[{idx}]' must be a string"))
1872 })?;
1873 if text.trim().is_empty() {
1874 return Err(crate::RedDBError::Query(format!(
1875 "field 'inputs[{idx}]' cannot be empty"
1876 )));
1877 }
1878 out.push(text.to_string());
1879 }
1880 if out.is_empty() {
1881 return Err(crate::RedDBError::Query(
1882 "field 'inputs' must be a non-empty array of strings".to_string(),
1883 ));
1884 }
1885 return Ok(out);
1886 }
1887
1888 if let Some(single) = payload
1889 .get("input")
1890 .and_then(|v| v.as_str())
1891 .map(str::trim)
1892 .filter(|s| !s.is_empty())
1893 {
1894 return Ok(vec![single.to_string()]);
1895 }
1896
1897 Err(crate::RedDBError::Query(
1898 "provide either 'input', 'inputs', or 'source_query'".to_string(),
1899 ))
1900}
1901
1902fn grpc_collect_inputs_from_source_query(
1903 runtime: &crate::runtime::RedDBRuntime,
1904 payload: &JsonValue,
1905 source_query: &str,
1906) -> crate::RedDBResult<Vec<String>> {
1907 let result = runtime
1908 .execute_query(source_query)
1909 .map_err(|err| crate::RedDBError::Query(format!("source_query failed: {err}")))?;
1910
1911 let source_mode = payload
1912 .get("source_mode")
1913 .and_then(|v| v.as_str())
1914 .map(str::trim)
1915 .filter(|s| !s.is_empty())
1916 .unwrap_or("row")
1917 .to_ascii_lowercase();
1918
1919 let mut out: Vec<String> = Vec::new();
1920 match source_mode.as_str() {
1921 "row" => {
1922 let field = payload
1923 .get("source_field")
1924 .and_then(|v| v.as_str())
1925 .map(str::trim)
1926 .filter(|s| !s.is_empty())
1927 .ok_or_else(|| {
1928 crate::RedDBError::Query(
1929 "field 'source_field' is required when source_mode='row'".to_string(),
1930 )
1931 })?;
1932 for rec in &result.result.records {
1933 for (key, value) in rec.iter_fields() {
1934 if key.as_ref() == field {
1935 if let crate::storage::schema::Value::Text(text) = value {
1936 let trimmed = text.trim();
1937 if !trimmed.is_empty() {
1938 out.push(trimmed.to_string());
1939 }
1940 }
1941 }
1942 }
1943 }
1944 }
1945 "result" => {
1946 for rec in &result.result.records {
1947 for (_, value) in rec.iter_fields() {
1948 if let crate::storage::schema::Value::Text(text) = value {
1949 let trimmed = text.trim();
1950 if !trimmed.is_empty() {
1951 out.push(trimmed.to_string());
1952 }
1953 }
1954 }
1955 }
1956 }
1957 other => {
1958 return Err(crate::RedDBError::Query(format!(
1959 "field 'source_mode' must be 'row' or 'result' (got '{other}')"
1960 )));
1961 }
1962 }
1963
1964 if out.is_empty() {
1965 return Err(crate::RedDBError::Query(
1966 "source_query produced zero non-empty text inputs".to_string(),
1967 ));
1968 }
1969 Ok(out)
1970}
1971
1972pub fn grpc_embeddings(
1994 runtime: &crate::runtime::RedDBRuntime,
1995 payload: &JsonValue,
1996) -> crate::RedDBResult<JsonValue> {
1997 let provider_name = payload
1998 .get("provider")
1999 .and_then(|v| v.as_str())
2000 .map(str::trim)
2001 .filter(|s| !s.is_empty())
2002 .unwrap_or("openai");
2003 let provider = parse_provider(provider_name)?;
2004 match &provider {
2009 AiProvider::Anthropic => {
2010 return Err(crate::RedDBError::Query(
2011 "Anthropic does not offer an embeddings API. \
2012 Re-issue the request against an OpenAI-compatible \
2013 provider (openai, groq, ollama, openrouter, together, \
2014 venice, deepseek), HuggingFace, or a custom base URL — \
2015 RedDB does not silently route embeddings to a \
2016 different provider than the one you named."
2017 .to_string(),
2018 ));
2019 }
2020 AiProvider::Local => {
2021 return Err(crate::RedDBError::Query(
2022 "Local embeddings require the `local-models` feature \
2023 flag at engine build time."
2024 .to_string(),
2025 ));
2026 }
2027 _ => {}
2028 }
2029
2030 let inputs: Vec<String> = grpc_collect_embedding_inputs(runtime, payload)?;
2031
2032 let model = payload
2033 .get("model")
2034 .and_then(|v| v.as_str())
2035 .map(str::trim)
2036 .filter(|s| !s.is_empty())
2037 .map(str::to_string)
2038 .or_else(|| {
2039 std::env::var(format!(
2040 "REDDB_{}_EMBEDDING_MODEL",
2041 provider.token().to_ascii_uppercase()
2042 ))
2043 .ok()
2044 })
2045 .or_else(|| std::env::var("REDDB_OPENAI_EMBEDDING_MODEL").ok())
2046 .filter(|v| !v.trim().is_empty())
2047 .unwrap_or_else(|| provider.default_embedding_model().to_string());
2048
2049 let credential = payload
2050 .get("credential")
2051 .and_then(|v| v.as_str())
2052 .map(str::to_string);
2053 let api_key = resolve_api_key_from_runtime(&provider, credential.as_deref(), runtime)?;
2054
2055 let dimensions = payload
2056 .get("dimensions")
2057 .and_then(|v| v.as_i64())
2058 .and_then(|v| usize::try_from(v).ok())
2059 .filter(|v| *v > 0);
2060
2061 let response = match &provider {
2062 AiProvider::HuggingFace => {
2063 huggingface_embeddings(&api_key, &model, &inputs, &provider.resolve_api_base())?
2064 }
2065 _ => {
2066 let transport = crate::runtime::ai::transport::AiTransport::from_runtime(runtime);
2067 let request = OpenAiEmbeddingRequest {
2068 api_key,
2069 model,
2070 inputs,
2071 dimensions,
2072 api_base: provider.resolve_api_base(),
2073 };
2074 crate::runtime::ai::block_on_ai(async move {
2075 openai_embeddings_async(&transport, request).await
2076 })
2077 .and_then(|result| result)?
2078 }
2079 };
2080
2081 let embeddings_json: Vec<JsonValue> = response
2082 .embeddings
2083 .into_iter()
2084 .map(|vec| {
2085 JsonValue::Array(
2086 vec.into_iter()
2087 .map(|f| JsonValue::Number(f as f64))
2088 .collect(),
2089 )
2090 })
2091 .collect();
2092
2093 let mut obj = Map::new();
2094 obj.insert(
2095 "provider".to_string(),
2096 JsonValue::String(response.provider.to_string()),
2097 );
2098 obj.insert("model".to_string(), JsonValue::String(response.model));
2099 obj.insert("embeddings".to_string(), JsonValue::Array(embeddings_json));
2100 if let Some(pt) = response.prompt_tokens {
2101 obj.insert("prompt_tokens".to_string(), JsonValue::Number(pt as f64));
2102 }
2103 if let Some(tt) = response.total_tokens {
2104 obj.insert("total_tokens".to_string(), JsonValue::Number(tt as f64));
2105 }
2106 Ok(JsonValue::Object(obj))
2107}
2108
2109pub fn grpc_prompt(
2111 _runtime: &crate::runtime::RedDBRuntime,
2112 _payload: &JsonValue,
2113) -> crate::RedDBResult<JsonValue> {
2114 Err(crate::RedDBError::FeatureNotEnabled(
2115 "AI prompt via gRPC requires HTTP endpoint; use POST /ai/prompt".to_string(),
2116 ))
2117}
2118
2119pub fn grpc_credentials(
2121 _runtime: &crate::runtime::RedDBRuntime,
2122 _payload: &JsonValue,
2123) -> crate::RedDBResult<JsonValue> {
2124 Err(crate::RedDBError::FeatureNotEnabled(
2125 "AI credentials via gRPC requires HTTP endpoint; use POST /ai/credentials".to_string(),
2126 ))
2127}