use std::path::PathBuf;
use std::sync::atomic::AtomicU32;
use std::sync::Arc;
use anyhow::Result;
pub(super) async fn build_embedder() -> Result<(
std::sync::Arc<dyn crate::core::Embedder>,
Option<Arc<AtomicU32>>,
)> {
use crate::service::embedder_supervisor::{
locate_embedderd_binary, LazyEmbedderHandle, SupervisorConfig,
};
let trusty_embedder_env = std::env::var("TRUSTY_EMBEDDER").unwrap_or_default();
#[cfg(feature = "candle")]
{
if trusty_embedder_env == "candle" {
let candle =
tokio::task::spawn_blocking(crate::service::candle_embedder::CandleEmbedder::new)
.await
.map_err(|e| anyhow::anyhow!("candle embedder init task panicked: {e}"))??;
let dim = candle.dimension();
tracing::info!("embedder initialized: model=all-MiniLM-L6-v2 dim={dim} backend=candle");
return Ok((std::sync::Arc::new(candle), None));
}
}
match trusty_embedder_env.as_str() {
"" | "auto" | "stdio" => {
let binary = locate_embedderd_binary().map_err(|e| {
anyhow::anyhow!(
"{e}\n\n\
ERROR: trusty-embedderd binary not found on PATH.\n\
\n\
trusty-search v0.13+ requires trusty-embedderd to be installed alongside it.\n\
\n\
Install it with:\n\
\x20 cargo install trusty-embedderd --locked\n\
\n\
Or set TRUSTY_EMBEDDERD_BIN to an absolute path:\n\
\x20 export TRUSTY_EMBEDDERD_BIN=/path/to/trusty-embedderd\n\
\n\
If you need to run without the sidecar (tests, debugging), use:\n\
\x20 TRUSTY_EMBEDDER=in-process trusty-search start"
)
})?;
let config = SupervisorConfig::from_env();
tracing::info!(
"embedder mode: stdio-sidecar lazy (binary={}, idle_shutdown_secs={})",
binary.display(),
config.idle_shutdown_secs,
);
let handle = Arc::new(LazyEmbedderHandle::new(binary, config));
let pid_slot = handle.app_pid_slot();
Ok((Arc::new(LazySlotEmbedderAdapter { handle }), Some(pid_slot)))
}
"in-process" | "local" => {
tracing::info!("embedder mode: in-process (override via TRUSTY_EMBEDDER=in-process)");
let embedder = build_in_process_embedder().await?;
Ok((embedder, None))
}
addr if addr.starts_with("http://") || addr.starts_with("https://") => {
tracing::info!("embedder mode: remote http ({})", addr);
let client = trusty_common::embedder_client::RemoteEmbedderClient::new(addr.to_owned());
Ok((
Arc::new(RemoteEmbedderAdapter {
client: EmbedderClientKind::Http(client),
}),
None,
))
}
path if path.starts_with("unix:") => {
let sock = PathBuf::from(&path["unix:".len()..]);
tracing::info!("embedder mode: remote uds ({})", sock.display());
let client = trusty_common::embedder_client::UdsEmbedderClient::new(sock);
Ok((Arc::new(UdsEmbedderAdapter { client }), None))
}
other => anyhow::bail!(
"invalid TRUSTY_EMBEDDER value: {other:?}. \
Expected: unset (default stdio sidecar), 'auto', 'stdio', 'in-process', \
'http://...', or 'unix:/path/to/socket'"
),
}
}
async fn build_in_process_embedder() -> Result<Arc<dyn crate::core::Embedder>> {
let embedder = crate::core::FastEmbedder::new().await.map_err(|e| {
tracing::error!("FastEmbedder init failed: {e:#}");
anyhow::anyhow!("FastEmbedder init failed: {e}")
})?;
let dim = <crate::core::FastEmbedder as crate::core::Embedder>::dimension(&embedder);
let provider = embedder.provider();
let metal_hint = match provider {
trusty_common::embedder::ExecutionProvider::CoreML => " (Metal GPU + ANE + CPU)",
trusty_common::embedder::ExecutionProvider::CoreMLAne => " (Neural Engine + CPU)",
trusty_common::embedder::ExecutionProvider::Cuda => " (CUDA GPU)",
trusty_common::embedder::ExecutionProvider::Cpu => "",
};
tracing::info!(
"embedder initialized: model=AllMiniLML6V2(Q) dim={dim} provider={provider}{metal_hint}"
);
tune_batch_size_for_provider(provider);
Ok(Arc::new(embedder))
}
enum EmbedderClientKind {
Http(trusty_common::embedder_client::RemoteEmbedderClient),
}
struct RemoteEmbedderAdapter {
client: EmbedderClientKind,
}
#[async_trait::async_trait]
impl crate::core::Embedder for RemoteEmbedderAdapter {
async fn embed(&self, text: &str) -> anyhow::Result<Vec<f32>> {
use trusty_common::embedder_client::EmbedderClient as _;
let mut v = match &self.client {
EmbedderClientKind::Http(c) => c
.embed_batch(vec![text.to_string()])
.await
.map_err(|e| anyhow::anyhow!("remote embed failed: {e}"))?,
};
v.pop()
.ok_or_else(|| anyhow::anyhow!("remote embedder returned no vector"))
}
async fn embed_batch(&self, texts: &[&str]) -> anyhow::Result<Vec<Vec<f32>>> {
use trusty_common::embedder_client::EmbedderClient as _;
let owned: Vec<String> = texts.iter().map(|s| (*s).to_owned()).collect();
match &self.client {
EmbedderClientKind::Http(c) => c
.embed_batch(owned)
.await
.map_err(|e| anyhow::anyhow!("remote embed_batch failed: {e}")),
}
}
fn dimension(&self) -> usize {
trusty_common::embedder::EMBED_DIM
}
fn provider(&self) -> trusty_common::embedder::ExecutionProvider {
trusty_common::embedder::resolve_expected_provider()
}
}
pub(super) struct UdsEmbedderAdapter {
pub(super) client: trusty_common::embedder_client::UdsEmbedderClient,
}
#[async_trait::async_trait]
impl crate::core::Embedder for UdsEmbedderAdapter {
async fn embed(&self, text: &str) -> anyhow::Result<Vec<f32>> {
use trusty_common::embedder_client::EmbedderClient as _;
let mut v = self
.client
.embed_batch(vec![text.to_string()])
.await
.map_err(|e| anyhow::anyhow!("uds embed failed: {e}"))?;
v.pop()
.ok_or_else(|| anyhow::anyhow!("uds embedder returned no vector"))
}
async fn embed_batch(&self, texts: &[&str]) -> anyhow::Result<Vec<Vec<f32>>> {
use trusty_common::embedder_client::EmbedderClient as _;
let owned: Vec<String> = texts.iter().map(|s| (*s).to_owned()).collect();
self.client
.embed_batch(owned)
.await
.map_err(|e| anyhow::anyhow!("uds embed_batch failed: {e}"))
}
fn dimension(&self) -> usize {
trusty_common::embedder::EMBED_DIM
}
fn provider(&self) -> trusty_common::embedder::ExecutionProvider {
trusty_common::embedder::resolve_expected_provider()
}
}
pub(super) struct LazySlotEmbedderAdapter {
pub(super) handle: Arc<crate::service::embedder_supervisor::LazyEmbedderHandle>,
}
#[async_trait::async_trait]
impl crate::core::Embedder for LazySlotEmbedderAdapter {
async fn embed(&self, text: &str) -> anyhow::Result<Vec<f32>> {
let text_owned = text.to_string();
let mut v = self
.handle
.embed_via(|client| async move { client.embed_batch(vec![text_owned]).await })
.await
.map_err(|e| anyhow::anyhow!("lazy-stdio embed failed: {e}"))?;
v.pop()
.ok_or_else(|| anyhow::anyhow!("lazy-stdio embedder returned no vector"))
}
async fn embed_batch(&self, texts: &[&str]) -> anyhow::Result<Vec<Vec<f32>>> {
let owned: Vec<String> = texts.iter().map(|s| (*s).to_owned()).collect();
self.handle
.embed_via(|client| async move { client.embed_batch(owned).await })
.await
.map_err(|e| anyhow::anyhow!("lazy-stdio embed_batch failed: {e}"))
}
fn dimension(&self) -> usize {
trusty_common::embedder::EMBED_DIM
}
fn provider(&self) -> trusty_common::embedder::ExecutionProvider {
trusty_common::embedder::resolve_expected_provider()
}
}
pub(super) fn tune_batch_size_for_provider(provider: trusty_common::embedder::ExecutionProvider) {
const GPU_BATCH_DEFAULT: usize = 512;
if matches!(
provider,
trusty_common::embedder::ExecutionProvider::CoreML
| trusty_common::embedder::ExecutionProvider::CoreMLAne
) {
let coreml_bs = crate::core::resolve_coreml_batch_size();
tracing::info!(
"gpu_batch_tuning: provider={provider} → using TRUSTY_COREML_BATCH_SIZE={coreml_bs} for \
indexing batches (CoreML EP allocates per-batch buffers in the unified-memory pool)"
);
return;
}
let is_gpu = matches!(provider, trusty_common::embedder::ExecutionProvider::Cuda);
if !is_gpu {
return;
}
if std::env::var("TRUSTY_MAX_BATCH_SIZE_EXPLICIT")
.map(|v| v == "1")
.unwrap_or(false)
{
tracing::info!(
"gpu_batch_tuning: TRUSTY_MAX_BATCH_SIZE_EXPLICIT=1 set, leaving batch size unchanged"
);
return;
}
let current = std::env::var("TRUSTY_MAX_BATCH_SIZE")
.ok()
.and_then(|v| v.parse::<usize>().ok())
.unwrap_or(128);
if current >= GPU_BATCH_DEFAULT {
return;
}
unsafe {
std::env::set_var("TRUSTY_MAX_BATCH_SIZE", GPU_BATCH_DEFAULT.to_string());
}
tracing::info!(
"gpu_batch_tuning: provider={provider} → TRUSTY_MAX_BATCH_SIZE={GPU_BATCH_DEFAULT} (was {current}); \
set TRUSTY_MAX_BATCH_SIZE_EXPLICIT=1 to keep your value"
);
}