inferd_client/lib.rs
1//! Rust client for the inferd local-inference daemon.
2//!
3//! Wire protocol is NDJSON over Unix socket / Windows named pipe /
4//! loopback TCP. Spec is frozen as protocol v1; see the inferd
5//! repository's `docs/protocol-v1.md`.
6//!
7//! Two patterns for waiting on the daemon to come up; pick based on
8//! whether you need progress UX:
9//!
10//! - **Pattern A (passive)** — [`dial_and_wait_ready`] retries
11//! connect against the inference transport with exponential
12//! backoff. Successful connect is the ready signal because the
13//! daemon's inference socket only exists when the backend is ready
14//! (THREAT_MODEL F-13 in the upstream repo). Standard
15//! Postgres/Redis/etcd client shape.
16//! - **Pattern B (active)** — [`AdminClient`] subscribes to the
17//! admin socket and yields lifecycle events
18//! (`starting`/`loading_model`/`ready`/`restarting`/`draining`).
19//! Use this for installer GUIs, dashboards, or middleware that
20//! wants to display download progress during first-boot
21//! bootstrap.
22//!
23//! ## Quickstart (v1)
24//!
25//! ```no_run
26//! use inferd_client::{Client, Request, Message, Role, Response};
27//! use tokio_stream::StreamExt;
28//!
29//! # async fn run() -> Result<(), Box<dyn std::error::Error>> {
30//! let mut client = inferd_client::dial_and_wait_ready(
31//! std::time::Duration::from_secs(30),
32//! || Client::dial_tcp("127.0.0.1:47321"),
33//! )
34//! .await?;
35//!
36//! let mut stream = client.generate(Request {
37//! id: "demo-1".into(),
38//! messages: vec![Message {
39//! role: Role::User,
40//! content: "hello".into(),
41//! }],
42//! ..Default::default()
43//! })
44//! .await?;
45//!
46//! while let Some(frame) = stream.next().await {
47//! match frame? {
48//! Response::Token { content, .. } => print!("{content}"),
49//! Response::Done { stop_reason, backend, .. } => {
50//! println!("\n[done; backend={backend}, stop={stop_reason:?}]");
51//! }
52//! Response::Error { code, message, .. } => {
53//! eprintln!("[error {code:?}: {message}]");
54//! }
55//! Response::Status { .. } => {}
56//! }
57//! }
58//! # Ok(())
59//! # }
60//! ```
61//!
62//! ## Quickstart (v2 — typed content blocks, attachments, tools)
63//!
64//! v2 lives on a *separate* socket from v1 per ADR 0015. Use
65//! [`ClientV2`] with `dial_v2_*` instead of `dial_tcp`/`dial_uds` and
66//! the v2 wire types (`RequestV2`, `ContentBlock`, …).
67//!
68//! ```no_run
69//! use inferd_client::{ClientV2, RequestV2, MessageV2, RoleV2, ContentBlock, ResponseV2, ResponseBlock};
70//! use tokio_stream::StreamExt;
71//!
72//! # async fn run() -> Result<(), Box<dyn std::error::Error>> {
73//! let mut client = inferd_client::dial_and_wait_ready(
74//! std::time::Duration::from_secs(30),
75//! || ClientV2::dial_tcp("127.0.0.1:47322"),
76//! )
77//! .await?;
78//!
79//! let mut stream = client.generate(RequestV2 {
80//! id: "demo-1".into(),
81//! messages: vec![MessageV2 {
82//! role: RoleV2::User,
83//! content: vec![ContentBlock::Text { text: "hello".into() }],
84//! }],
85//! ..Default::default()
86//! })
87//! .await?;
88//!
89//! while let Some(frame) = stream.next().await {
90//! match frame? {
91//! ResponseV2::Frame { block: ResponseBlock::Text { delta }, .. } => print!("{delta}"),
92//! ResponseV2::Frame { block: ResponseBlock::Thinking { .. }, .. } => {}
93//! ResponseV2::Frame { block: ResponseBlock::ToolUse { name, .. }, .. } => {
94//! println!("\n[tool_use: {name}]");
95//! }
96//! ResponseV2::Done { stop_reason, backend, .. } => {
97//! println!("\n[done; backend={backend}, stop={stop_reason:?}]");
98//! }
99//! ResponseV2::Error { code, message, .. } => {
100//! eprintln!("[error {code:?}: {message}]");
101//! }
102//! }
103//! }
104//! # Ok(())
105//! # }
106//! ```
107//!
108//! ## Quickstart (embed — single-frame request/response)
109//!
110//! Embed lives on a *third* socket separate from v1 and v2 per ADR
111//! 0017. Use [`EmbedClient`] with `dial_embed_*` and the embed wire
112//! types (`EmbedRequest`, `EmbedResponse`, `EmbedTask`, …). The call
113//! is a single round-trip — no streaming, since an embedding is a
114//! complete vector.
115//!
116//! ```no_run
117//! use inferd_client::{EmbedClient, EmbedRequest, EmbedResponse, EmbedTask};
118//!
119//! # async fn run() -> Result<(), Box<dyn std::error::Error>> {
120//! let mut client = inferd_client::dial_and_wait_ready(
121//! std::time::Duration::from_secs(30),
122//! || EmbedClient::dial_tcp("127.0.0.1:47323"),
123//! )
124//! .await?;
125//!
126//! let resp = client.embed(EmbedRequest {
127//! id: "demo-1".into(),
128//! input: vec!["the quick brown fox".into()],
129//! dimensions: Some(256),
130//! task: Some(EmbedTask::RetrievalDocument),
131//! })
132//! .await?;
133//!
134//! match resp {
135//! EmbedResponse::Embeddings { embeddings, dimensions, .. } => {
136//! println!("got {} vectors of dim {dimensions}", embeddings.len());
137//! }
138//! EmbedResponse::Error { code, message, .. } => {
139//! eprintln!("[embed error {code:?}: {message}]");
140//! }
141//! }
142//! # Ok(())
143//! # }
144//! ```
145
146#![forbid(unsafe_code)]
147#![warn(missing_docs, rust_2018_idioms)]
148
149mod admin;
150mod client;
151mod embed_client;
152mod v2_client;
153mod wait;
154
155pub use admin::{AdminClient, AdminEvent};
156pub use client::{Client, ClientError, FrameStream};
157pub use embed_client::{EmbedClient, default_embed_addr};
158pub use v2_client::{ClientV2, FrameStreamV2, default_v2_addr};
159pub use wait::{WaitError, default_admin_addr, dial_and_wait_ready, is_transient_dial_error};
160
161/// Re-exports from `inferd-proto` so consumers don't need a separate
162/// `inferd-proto` dep for the wire types. The proto crate IS the
163/// version-pin contract for protocol compatibility — `inferd-client
164/// 0.2` always uses `inferd-proto 0.2`.
165pub use inferd_proto::{
166 ErrorCode, ImageTokenBudget, MAX_FRAME_BYTES, Message, ProtoError, Request, Resolved, Response,
167 Role, StopReason, Usage, VALID_IMAGE_TOKEN_BUDGETS,
168};
169
170/// Re-exports of the v2 wire types per ADR 0015. v2 is shipped as
171/// part of `inferd-client 0.2` so consumers building against v2 can
172/// reach the proto types without a separate `inferd-proto` dep.
173pub use inferd_proto::v2::{
174 Attachment, ContentBlock, ErrorCodeV2, MessageV2, RequestV2, ResolvedV2, ResponseBlock,
175 ResponseV2, RoleV2, StopReasonV2, Tool, ToolCallId, ToolUseInput, UsageV2,
176};
177
178/// Re-exports of the embed wire types per ADR 0017. Embed lives on
179/// the *third* inferd socket (separate from v1 and v2); the
180/// proto types are re-exported here so consumers don't need a separate
181/// `inferd-proto` dep.
182pub use inferd_proto::embed::{
183 EmbedErrorCode, EmbedRequest, EmbedResolved, EmbedResponse, EmbedTask, EmbedUsage,
184};