Skip to main content

inferd_client/
lib.rs

1//! Rust client for the inferd local-inference daemon.
2//!
3//! Wire protocol is NDJSON over Unix socket / Windows named pipe /
4//! loopback TCP. Spec is frozen as protocol v1; see the inferd
5//! repository's `docs/protocol-v1.md`.
6//!
7//! Two patterns for waiting on the daemon to come up; pick based on
8//! whether you need progress UX:
9//!
10//! - **Pattern A (passive)** — [`dial_and_wait_ready`] retries
11//!   connect against the inference transport with exponential
12//!   backoff. Successful connect is the ready signal because the
13//!   daemon's inference socket only exists when the backend is ready
14//!   (THREAT_MODEL F-13 in the upstream repo). Standard
15//!   Postgres/Redis/etcd client shape.
16//! - **Pattern B (active)** — [`AdminClient`] subscribes to the
17//!   admin socket and yields lifecycle events
18//!   (`starting`/`loading_model`/`ready`/`restarting`/`draining`).
19//!   Use this for installer GUIs, dashboards, or middleware that
20//!   wants to display download progress during first-boot
21//!   bootstrap.
22//!
23//! ## Quickstart (v1)
24//!
25//! ```no_run
26//! use inferd_client::{Client, Request, Message, Role, Response};
27//! use tokio_stream::StreamExt;
28//!
29//! # async fn run() -> Result<(), Box<dyn std::error::Error>> {
30//! let mut client = inferd_client::dial_and_wait_ready(
31//!     std::time::Duration::from_secs(30),
32//!     || Client::dial_tcp("127.0.0.1:47321"),
33//! )
34//! .await?;
35//!
36//! let mut stream = client.generate(Request {
37//!     id: "demo-1".into(),
38//!     messages: vec![Message {
39//!         role: Role::User,
40//!         content: "hello".into(),
41//!     }],
42//!     ..Default::default()
43//! })
44//! .await?;
45//!
46//! while let Some(frame) = stream.next().await {
47//!     match frame? {
48//!         Response::Token { content, .. } => print!("{content}"),
49//!         Response::Done { stop_reason, backend, .. } => {
50//!             println!("\n[done; backend={backend}, stop={stop_reason:?}]");
51//!         }
52//!         Response::Error { code, message, .. } => {
53//!             eprintln!("[error {code:?}: {message}]");
54//!         }
55//!         Response::Status { .. } => {}
56//!     }
57//! }
58//! # Ok(())
59//! # }
60//! ```
61//!
62//! ## Quickstart (v2 — typed content blocks, attachments, tools)
63//!
64//! v2 lives on a *separate* socket from v1 per ADR 0015. Use
65//! [`ClientV2`] with `dial_v2_*` instead of `dial_tcp`/`dial_uds` and
66//! the v2 wire types (`RequestV2`, `ContentBlock`, …).
67//!
68//! ```no_run
69//! use inferd_client::{ClientV2, RequestV2, MessageV2, RoleV2, ContentBlock, ResponseV2, ResponseBlock};
70//! use tokio_stream::StreamExt;
71//!
72//! # async fn run() -> Result<(), Box<dyn std::error::Error>> {
73//! let mut client = inferd_client::dial_and_wait_ready(
74//!     std::time::Duration::from_secs(30),
75//!     || ClientV2::dial_tcp("127.0.0.1:47322"),
76//! )
77//! .await?;
78//!
79//! let mut stream = client.generate(RequestV2 {
80//!     id: "demo-1".into(),
81//!     messages: vec![MessageV2 {
82//!         role: RoleV2::User,
83//!         content: vec![ContentBlock::Text { text: "hello".into() }],
84//!     }],
85//!     ..Default::default()
86//! })
87//! .await?;
88//!
89//! while let Some(frame) = stream.next().await {
90//!     match frame? {
91//!         ResponseV2::Frame { block: ResponseBlock::Text { delta }, .. } => print!("{delta}"),
92//!         ResponseV2::Frame { block: ResponseBlock::Thinking { .. }, .. } => {}
93//!         ResponseV2::Frame { block: ResponseBlock::ToolUse { name, .. }, .. } => {
94//!             println!("\n[tool_use: {name}]");
95//!         }
96//!         ResponseV2::Done { stop_reason, backend, .. } => {
97//!             println!("\n[done; backend={backend}, stop={stop_reason:?}]");
98//!         }
99//!         ResponseV2::Error { code, message, .. } => {
100//!             eprintln!("[error {code:?}: {message}]");
101//!         }
102//!     }
103//! }
104//! # Ok(())
105//! # }
106//! ```
107//!
108//! ## Quickstart (embed — single-frame request/response)
109//!
110//! Embed lives on a *third* socket separate from v1 and v2 per ADR
111//! 0017. Use [`EmbedClient`] with `dial_embed_*` and the embed wire
112//! types (`EmbedRequest`, `EmbedResponse`, `EmbedTask`, …). The call
113//! is a single round-trip — no streaming, since an embedding is a
114//! complete vector.
115//!
116//! ```no_run
117//! use inferd_client::{EmbedClient, EmbedRequest, EmbedResponse, EmbedTask};
118//!
119//! # async fn run() -> Result<(), Box<dyn std::error::Error>> {
120//! let mut client = inferd_client::dial_and_wait_ready(
121//!     std::time::Duration::from_secs(30),
122//!     || EmbedClient::dial_tcp("127.0.0.1:47323"),
123//! )
124//! .await?;
125//!
126//! let resp = client.embed(EmbedRequest {
127//!     id: "demo-1".into(),
128//!     input: vec!["the quick brown fox".into()],
129//!     dimensions: Some(256),
130//!     task: Some(EmbedTask::RetrievalDocument),
131//! })
132//! .await?;
133//!
134//! match resp {
135//!     EmbedResponse::Embeddings { embeddings, dimensions, .. } => {
136//!         println!("got {} vectors of dim {dimensions}", embeddings.len());
137//!     }
138//!     EmbedResponse::Error { code, message, .. } => {
139//!         eprintln!("[embed error {code:?}: {message}]");
140//!     }
141//! }
142//! # Ok(())
143//! # }
144//! ```
145
146#![forbid(unsafe_code)]
147#![warn(missing_docs, rust_2018_idioms)]
148
149mod admin;
150mod client;
151mod embed_client;
152mod v2_client;
153mod wait;
154
155pub use admin::{AdminClient, AdminEvent};
156pub use client::{Client, ClientError, FrameStream};
157pub use embed_client::{EmbedClient, default_embed_addr};
158pub use v2_client::{ClientV2, FrameStreamV2, default_v2_addr};
159pub use wait::{WaitError, default_admin_addr, dial_and_wait_ready, is_transient_dial_error};
160
161/// Re-exports from `inferd-proto` so consumers don't need a separate
162/// `inferd-proto` dep for the wire types. The proto crate IS the
163/// version-pin contract for protocol compatibility — `inferd-client
164/// 0.2` always uses `inferd-proto 0.2`.
165pub use inferd_proto::{
166    ErrorCode, ImageTokenBudget, MAX_FRAME_BYTES, Message, ProtoError, Request, Resolved, Response,
167    Role, StopReason, Usage, VALID_IMAGE_TOKEN_BUDGETS,
168};
169
170/// Re-exports of the v2 wire types per ADR 0015. v2 is shipped as
171/// part of `inferd-client 0.2` so consumers building against v2 can
172/// reach the proto types without a separate `inferd-proto` dep.
173pub use inferd_proto::v2::{
174    Attachment, ContentBlock, ErrorCodeV2, MessageV2, RequestV2, ResolvedV2, ResponseBlock,
175    ResponseV2, RoleV2, StopReasonV2, Tool, ToolCallId, ToolUseInput, UsageV2,
176};
177
178/// Re-exports of the embed wire types per ADR 0017. Embed lives on
179/// the *third* inferd socket (separate from v1 and v2); the
180/// proto types are re-exported here so consumers don't need a separate
181/// `inferd-proto` dep.
182pub use inferd_proto::embed::{
183    EmbedErrorCode, EmbedRequest, EmbedResolved, EmbedResponse, EmbedTask, EmbedUsage,
184};