1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
//! Rust client for the inferd local-inference daemon.
//!
//! Wire protocol is NDJSON over Unix socket / Windows named pipe /
//! loopback TCP. Spec is frozen as protocol v1; see the inferd
//! repository's `docs/protocol-v1.md`.
//!
//! Two patterns for waiting on the daemon to come up; pick based on
//! whether you need progress UX:
//!
//! - **Pattern A (passive)** — [`dial_and_wait_ready`] retries
//! connect against the inference transport with exponential
//! backoff. Successful connect is the ready signal because the
//! daemon's inference socket only exists when the backend is ready
//! (THREAT_MODEL F-13 in the upstream repo). Standard
//! Postgres/Redis/etcd client shape.
//! - **Pattern B (active)** — [`AdminClient`] subscribes to the
//! admin socket and yields lifecycle events
//! (`starting`/`loading_model`/`ready`/`restarting`/`draining`).
//! Use this for installer GUIs, dashboards, or middleware that
//! wants to display download progress during first-boot
//! bootstrap.
//!
//! ## Quickstart (v1)
//!
//! ```no_run
//! use inferd_client::{Client, Request, Message, Role, Response};
//! use tokio_stream::StreamExt;
//!
//! # async fn run() -> Result<(), Box<dyn std::error::Error>> {
//! let mut client = inferd_client::dial_and_wait_ready(
//! std::time::Duration::from_secs(30),
//! || Client::dial_tcp("127.0.0.1:47321"),
//! )
//! .await?;
//!
//! let mut stream = client.generate(Request {
//! id: "demo-1".into(),
//! messages: vec![Message {
//! role: Role::User,
//! content: "hello".into(),
//! }],
//! ..Default::default()
//! })
//! .await?;
//!
//! while let Some(frame) = stream.next().await {
//! match frame? {
//! Response::Token { content, .. } => print!("{content}"),
//! Response::Done { stop_reason, backend, .. } => {
//! println!("\n[done; backend={backend}, stop={stop_reason:?}]");
//! }
//! Response::Error { code, message, .. } => {
//! eprintln!("[error {code:?}: {message}]");
//! }
//! Response::Status { .. } => {}
//! }
//! }
//! # Ok(())
//! # }
//! ```
//!
//! ## Quickstart (v2 — typed content blocks, attachments, tools)
//!
//! v2 lives on a *separate* socket from v1 per ADR 0015. Use
//! [`ClientV2`] with `dial_v2_*` instead of `dial_tcp`/`dial_uds` and
//! the v2 wire types (`RequestV2`, `ContentBlock`, …).
//!
//! ```no_run
//! use inferd_client::{ClientV2, RequestV2, MessageV2, RoleV2, ContentBlock, ResponseV2, ResponseBlock};
//! use tokio_stream::StreamExt;
//!
//! # async fn run() -> Result<(), Box<dyn std::error::Error>> {
//! let mut client = inferd_client::dial_and_wait_ready(
//! std::time::Duration::from_secs(30),
//! || ClientV2::dial_tcp("127.0.0.1:47322"),
//! )
//! .await?;
//!
//! let mut stream = client.generate(RequestV2 {
//! id: "demo-1".into(),
//! messages: vec![MessageV2 {
//! role: RoleV2::User,
//! content: vec![ContentBlock::Text { text: "hello".into() }],
//! }],
//! ..Default::default()
//! })
//! .await?;
//!
//! while let Some(frame) = stream.next().await {
//! match frame? {
//! ResponseV2::Frame { block: ResponseBlock::Text { delta }, .. } => print!("{delta}"),
//! ResponseV2::Frame { block: ResponseBlock::Thinking { .. }, .. } => {}
//! ResponseV2::Frame { block: ResponseBlock::ToolUse { name, .. }, .. } => {
//! println!("\n[tool_use: {name}]");
//! }
//! ResponseV2::Done { stop_reason, backend, .. } => {
//! println!("\n[done; backend={backend}, stop={stop_reason:?}]");
//! }
//! ResponseV2::Error { code, message, .. } => {
//! eprintln!("[error {code:?}: {message}]");
//! }
//! }
//! }
//! # Ok(())
//! # }
//! ```
//!
//! ## Quickstart (embed — single-frame request/response)
//!
//! Embed lives on a *third* socket separate from v1 and v2 per ADR
//! 0017. Use [`EmbedClient`] with `dial_embed_*` and the embed wire
//! types (`EmbedRequest`, `EmbedResponse`, `EmbedTask`, …). The call
//! is a single round-trip — no streaming, since an embedding is a
//! complete vector.
//!
//! ```no_run
//! use inferd_client::{EmbedClient, EmbedRequest, EmbedResponse, EmbedTask};
//!
//! # async fn run() -> Result<(), Box<dyn std::error::Error>> {
//! let mut client = inferd_client::dial_and_wait_ready(
//! std::time::Duration::from_secs(30),
//! || EmbedClient::dial_tcp("127.0.0.1:47323"),
//! )
//! .await?;
//!
//! let resp = client.embed(EmbedRequest {
//! id: "demo-1".into(),
//! input: vec!["the quick brown fox".into()],
//! dimensions: Some(256),
//! task: Some(EmbedTask::RetrievalDocument),
//! })
//! .await?;
//!
//! match resp {
//! EmbedResponse::Embeddings { embeddings, dimensions, .. } => {
//! println!("got {} vectors of dim {dimensions}", embeddings.len());
//! }
//! EmbedResponse::Error { code, message, .. } => {
//! eprintln!("[embed error {code:?}: {message}]");
//! }
//! }
//! # Ok(())
//! # }
//! ```
pub use ;
pub use ;
pub use ;
pub use ;
pub use ;
/// Re-exports from `inferd-proto` so consumers don't need a separate
/// `inferd-proto` dep for the wire types. The proto crate IS the
/// version-pin contract for protocol compatibility — `inferd-client
/// 0.2` always uses `inferd-proto 0.2`.
pub use ;
/// Re-exports of the v2 wire types per ADR 0015. v2 is shipped as
/// part of `inferd-client 0.2` so consumers building against v2 can
/// reach the proto types without a separate `inferd-proto` dep.
pub use ;
/// Re-exports of the embed wire types per ADR 0017. Embed lives on
/// the *third* inferd socket (separate from v1 and v2); the
/// proto types are re-exported here so consumers don't need a separate
/// `inferd-proto` dep.
pub use ;