Skip to main content

trusty_common/
lib.rs

1//! Shared utility surface for trusty-* projects.
2//!
3//! Why: Port auto-detect, data-directory resolution, tracing init, NO_COLOR
4//! handling, and the OpenRouter chat-completions client appeared in both
5//! trusty-memory and trusty-search with subtle divergence. Centralising keeps
6//! them aligned and gives future trusty-* binaries a one-import surface.
7//!
8//! What: pure utility functions — no global state. Each subsystem is a free
9//! function or a small helper struct.
10//!
11//! Test: `cargo test -p trusty-common` covers port walking, data-dir creation,
12//! and the OpenRouter request shape (without hitting the network).
13//!
14//! # Test isolation: `TRUSTY_DATA_DIR_OVERRIDE`
15//!
16//! macOS's [`dirs::data_dir()`] resolves the application-support directory via
17//! `NSFileManager`, a native Cocoa API that completely ignores the `HOME` and
18//! `XDG_DATA_HOME` environment variables. This makes it impossible to redirect
19//! data-directory access in tests using ordinary env-var tricks, because the
20//! kernel query bypasses the environment entirely.
21//!
22//! To work around this, [`resolve_data_dir`] checks the
23//! [`DATA_DIR_OVERRIDE_ENV`] (`TRUSTY_DATA_DIR_OVERRIDE`) environment variable
24//! before consulting `dirs::data_dir()`. When set, the variable's value is used
25//! as the base directory verbatim, and `dirs::data_dir()` is never called.
26//!
27//! **This escape hatch is intended for testing only.** Do not set it in
28//! production deployments; rely on the OS-standard data directory instead.
29
30use std::net::SocketAddr;
31use std::path::{Path, PathBuf};
32
33pub mod chat;
34pub mod claude_config;
35pub mod project_discovery;
36
37/// Bounded in-memory ring buffer of recent tracing log lines.
38///
39/// Why: trusty-* daemons expose a `/logs/tail` endpoint so operators can read
40/// recent logs over HTTP without file I/O or a daemon restart. The buffer and
41/// its `tracing_subscriber::Layer` live here so every daemon shares one impl.
42/// What: `LogBuffer` (thread-safe capped `VecDeque<String>`) plus
43/// `LogBufferLayer` (the tracing layer that feeds it).
44/// Test: `cargo test -p trusty-common log_buffer` covers capacity eviction,
45/// tail semantics, and layer capture.
46pub mod log_buffer;
47
48/// Process RSS / CPU sampling and data-directory sizing for daemon health.
49///
50/// Why: every trusty-* daemon's `/health` endpoint reports its own resident
51/// memory, CPU usage, and on-disk footprint; the sampling logic is identical
52/// across them so it lives here once.
53/// What: `SysMetrics` (per-process RSS + CPU sampler) and `dir_size_bytes`
54/// (recursive directory byte count).
55/// Test: `cargo test -p trusty-common sys_metrics`.
56pub mod sys_metrics;
57
58/// macOS LaunchAgent generation and lifecycle management. macOS-only —
59/// the module compiles to nothing on every other platform.
60#[cfg(target_os = "macos")]
61pub mod launchd;
62
63#[cfg(feature = "axum-server")]
64pub mod server;
65
66/// Shared JSON-RPC 2.0 / MCP primitives (formerly the `trusty-mcp-core` crate).
67///
68/// Why: Centralises `Request`/`Response`/`JsonRpcError` envelopes, the
69/// `initialize` response builder, an async stdio dispatch loop, and the
70/// OpenRPC `rpc.discover` helpers so every MCP server in the workspace
71/// imports the same types.
72/// What: Gated behind the `mcp` feature; pulls in no extra dependencies
73/// beyond `serde` / `tokio`, both of which are already required.
74/// Test: `cargo test -p trusty-common --features mcp` runs the module's
75/// own unit tests (envelope round-trips, stdio loop dispatch, OpenRPC
76/// builder shape).
77#[cfg(feature = "mcp")]
78pub mod mcp;
79
80/// General-purpose JSON-RPC client + transports (formerly the library half
81/// of the `trusty-rpc` crate).
82///
83/// Why: Both `trpc` (the CLI) and any future library consumer want one
84/// place that owns the JSON-RPC envelope construction, stdio-subprocess
85/// transport, HTTP transport, and pretty-printers.
86/// What: Gated behind the `rpc` feature; requires `uuid` for request id
87/// generation. The HTTP transport reuses the workspace `reqwest`.
88/// Test: `cargo test -p trusty-common --features rpc` runs the module's
89/// own unit tests (envelope extraction, pretty-print smoke tests).
90#[cfg(feature = "rpc")]
91pub mod rpc;
92
93/// Shared text-embedding abstraction (formerly the `trusty-embedder` crate).
94///
95/// Why: trusty-memory and trusty-search both ship near-identical `Embedder`
96/// traits and `FastEmbedder` implementations; centralising the surface here
97/// keeps them aligned and lets future consumers pick up embedding for free
98/// without a separate published crate.
99/// What: Gated behind the `embedder` feature. Exposes the `Embedder` trait,
100/// `FastEmbedder` (fastembed-rs, all-MiniLM-L6-v2, 384-d) with LRU caching
101/// and ORT warmup, and (under `embedder-test-support`) the `MockEmbedder`
102/// test double.
103/// Test: `cargo test -p trusty-common --features embedder,embedder-test-support`
104/// covers the mock embedder and ONNX-backed `#[ignore]`d integration tests.
105#[cfg(feature = "embedder")]
106pub mod embedder;
107
108/// Unified RPC client surface for the `trusty-embedderd` standalone process.
109///
110/// Why: absorbs both the former `trusty-embedder-client` HTTP crate (PR #163)
111/// and the former `embed_client` UDS module (PR #157) into a single unified
112/// module. Reduces workspace crate count and provides one trait (`EmbedderClient`)
113/// with three concrete implementations (InProcess, HTTP remote, UDS remote) so
114/// call sites are identical regardless of transport. The `embed-client` feature
115/// and `embed_client` module are retired by issue #164; use `embedder-client`
116/// and `trusty_common::embedder_client::UdsEmbedderClient` instead.
117/// What: Gated behind the `embedder-client` feature. Exposes the
118/// `EmbedderClient` trait, `InProcessEmbedderClient`, `RemoteEmbedderClient`
119/// (HTTP), `UdsEmbedderClient` (UDS), `EmbedRequest` / `EmbedResponse` wire
120/// types, and `EmbedderError`. The UDS impl uses `tokio::net::UnixStream`
121/// with newline-framed JSON-RPC 2.0 — no additional dependencies.
122/// Test: `cargo test -p trusty-common --features embedder-client` covers
123/// error-display, JSON round-trip, URL assembly, UDS wire types, and empty-
124/// batch short-circuits. ONNX-backed tests are in
125/// `trusty-embedderd/tests/bit_identical.rs` (`#[ignore]`).
126#[cfg(feature = "embedder-client")]
127pub mod embedder_client;
128
129/// Zero-dependency BM25 lexical index + code-aware tokenizer (issue #156).
130///
131/// Why: trusty-memory, trusty-search, and the per-palace
132/// `trusty-bm25-daemon` subprocess all want one shared BM25 implementation
133/// so the tokenizer's camelCase / PascalCase / alpha↔digit splits stay
134/// consistent across the workspace. Originally ported from open-mpm; now
135/// the single source of truth lives here.
136/// What: Gated behind the `bm25` feature. Adds no new dependencies — pure
137/// `std` + `tracing` (already required).
138/// Test: `cargo test -p trusty-common --features bm25`.
139#[cfg(feature = "bm25")]
140pub mod bm25;
141
142/// Reusable schema-migration kernel (issue #179).
143///
144/// Why: trusty-search, trusty-memory, and other long-lived stores have grown
145/// ad-hoc schema-migration loops that drift apart. Centralising the
146/// `SchemaVersion` newtype, the `Migration<S>` trait, and a `MigrationRunner`
147/// that applies pending steps in order (writing a stamp after each) collapses
148/// those into one shared kernel. The `file_stamp` helper covers the common
149/// "JSON sidecar in the store's data dir" stamp format; redb-stamp users get
150/// a documented recipe instead of a heavyweight dep.
151/// What: gated behind the `migrations` feature flag. Adds no new
152/// dependencies — pure `serde` + `serde_json` + `anyhow` + `tracing` which
153/// the crate already requires.
154/// Test: `cargo test -p trusty-common --features migrations` covers the
155/// runner ordering, crash resumption, write-stamp failure propagation, and
156/// the file-stamp round-trip / atomic-write behaviour.
157#[cfg(feature = "migrations")]
158pub mod migrations;
159
160/// UDS JSON-RPC client for the per-palace `trusty-bm25-daemon` subprocess
161/// (issue #156).
162///
163/// Why: trusty-memory needs a lexical-search lane without holding an
164/// in-process BM25 index. `Bm25Client` delegates to the per-palace daemon
165/// over `$TMPDIR/trusty-bm25-<palace>.sock`, matching the design of
166/// `EmbedClient` and `trusty-embed-daemon` (PR #157).
167/// What: Gated behind the `bm25-client` feature. Pure user of existing
168/// `tokio` / `serde_json` / `anyhow` workspace deps — adds no new
169/// dependencies.
170/// Test: `cargo test -p trusty-common --features bm25-client` covers
171/// request shape and path defaults; end-to-end coverage lives in
172/// `trusty-bm25-daemon/tests/`.
173#[cfg(feature = "bm25-client")]
174pub mod bm25_client;
175
176/// Symbol-graph engine (formerly the `trusty-symgraph` crate).
177///
178/// Why: All trusty-* tools that touch source code (open-mpm, trusty-search,
179/// trusty-analyze) want the same `EntityType` / `RawEntity` / `EdgeKind`
180/// data shapes and (for orchestrators) the same tree-sitter pipeline. Living
181/// here lets the workspace ship one tree-sitter `links =` slot instead of
182/// juggling two crates that both claim it.
183/// What: Gated behind two features. `symgraph` exposes only the contracts
184/// surface (`EntityType`, `RawEntity`, `EdgeKind`, `fact_hash_str`, tables)
185/// — no tree-sitter, no `links` conflict. `symgraph-parser` additionally
186/// pulls in tree-sitter and the full parse → registry → emit stack.
187/// `symgraph-server` enables the HTTP server frontend.
188/// Test: `cargo test -p trusty-common --features symgraph` exercises the
189/// contracts surface; `cargo test -p trusty-symgraph` covers the parser
190/// path through the thin re-export shim.
191#[cfg(feature = "symgraph")]
192pub mod symgraph;
193
194/// Memory Palace storage engine (formerly the `trusty-memory-core` crate).
195///
196/// Why: Centralises the Memory Palace data model (`Palace` / `Wing` /
197/// `Room` / `Drawer`), storage backends (usearch vector index + SQLite
198/// knowledge graph + chat-session log + payload store), retrieval handle,
199/// and the dream / decay / analytics / git-history surfaces so every
200/// trusty-* binary that talks to a palace reuses the same types. Absorbed
201/// into `trusty-common` (issue #5 phase 2d) so we ship one fewer published
202/// crate.
203/// What: Gated behind the `memory-core` feature because it pulls in heavy
204/// storage deps (`usearch`, `rusqlite`, `r2d2`, `git2`, `kuzu`). Enables
205/// the embedder surface automatically (memory-core → embedder).
206/// Test: `cargo test -p trusty-common --features memory-core` exercises
207/// the full surface.
208#[cfg(feature = "memory-core")]
209pub mod memory_core;
210
211/// Unified ticketing MCP server (formerly the `trusty-tickets` crate).
212///
213/// Why: Claude Code and the rest of the trusty-* suite need a single MCP
214/// surface that can talk to GitHub Issues, JIRA, and Linear without the
215/// caller needing to know which backend is configured. Absorbing into
216/// `trusty-common` reduces the workspace crate count and co-locates the
217/// HTTP client surface with the other protocol helpers.
218/// What: Gated behind the `tickets` feature. Exposes `tickets::api::*`
219/// (config, models, Backend trait, three concrete backends), `tickets::server`
220/// (MCP dispatch loop + `run_stdio`), and `tickets::tools` (the tool-list
221/// schema). Requires the `mcp` feature for the stdio loop.
222/// Test: `cargo test -p trusty-common --features tickets` runs the module's
223/// own unit tests (dispatch, tool-list counts, config parsing, serde
224/// round-trips). Live backend tests require env-var credentials.
225#[cfg(feature = "tickets")]
226pub mod tickets;
227
228/// Declarative CLI help system with "did you mean?" suggestions (issue #216).
229///
230/// Why: every standalone trusty-* binary used to render its `--help` and
231/// unknown-subcommand error output independently, so the formats drifted
232/// apart over time. Centralising the help model into one YAML schema, one
233/// canonical renderer, and one Jaro-Winkler suggester keeps the six binaries
234/// (search, memory, analyze, mpm-cli, tga, open-mpm) speaking with a single
235/// user-facing voice.
236/// What: gated behind the `cli-help` feature. Pulls in `serde_yaml`, `strsim`,
237/// and `indexmap`. Exposes `HelpConfig` / `CommandDef` / `FlagDef` / `Example`
238/// + `load_help` / `render_help` / `suggest`.
239/// Test: `cargo test -p trusty-common --features cli-help`.
240#[cfg(feature = "cli-help")]
241pub mod help;
242
243/// Unified monitor TUI for the trusty-search and trusty-memory daemons
244/// (formerly the `trusty-monitor-tui` crate).
245///
246/// Why: operators run both daemons and want one terminal surface that shows
247/// the health of both at a glance. Living here behind the `monitor-tui`
248/// feature flag matches the workspace's "one fewer published crate" direction
249/// (issue #31 companion) and keeps the dashboard logic unit-testable.
250/// What: gated behind the `monitor-tui` feature, which pulls in `ratatui` and
251/// `crossterm`. Exposes `monitor::run` (the entry point the `trusty-monitor`
252/// binary calls) plus the pure `dashboard` / `search_client` / `memory_client`
253/// submodules.
254/// Test: `cargo test -p trusty-common --features monitor-tui` covers the
255/// rendering, layout, and HTTP-client pieces.
256#[cfg(feature = "monitor-tui")]
257pub mod monitor;
258
259/// Throttled crates.io update-notification helper.
260///
261/// Why: User-facing CLIs should nudge operators when a newer release is
262/// available without adding perceptible latency. A shared implementation
263/// keeps the throttle, cache, opt-out, and User-Agent logic consistent across
264/// every consumer in the workspace.
265/// What: Gated behind the `update-check` feature. Exposes
266/// [`update::check_throttled`] (the main entry — reads a per-crate JSON cache
267/// under the OS cache dir, queries crates.io at most once per 24 h),
268/// [`update::check_crates_io`] (the raw network call), [`update::notice`]
269/// (formatted upgrade message), and [`update::UpdateInfo`] (the result type).
270/// All failures degrade to `None` — the check is best-effort and will not
271/// panic or stall a CLI.
272/// Opt-out: set `TRUSTY_NO_UPDATE_CHECK` or `CI` to any non-empty value.
273/// Test: `cargo test -p trusty-common --features update-check`.
274#[cfg(feature = "update-check")]
275pub mod update;
276
277/// Error-capture layer for the trusty-* consent-gated bug-reporting system
278/// (bug-reporting Phase 1, issue #479).
279///
280/// Why: Every trusty-* daemon encounters runtime errors that developers need
281///      to see but that must be captured locally and only filed to GitHub after
282///      explicit user consent. A shared capture layer in `trusty-common` means
283///      all daemons gain error capture without per-binary changes.
284/// What: Gated behind the `bug-capture` feature. Exposes:
285///      - [`error_capture::CapturedError`] — structured error record.
286///      - [`error_capture::ErrorStore`] — ring buffer + JSONL store.
287///      - [`error_capture::BugCaptureLayer`] — the tracing Layer.
288///      - [`error_capture::bug_capture_layer`] — convenience constructor.
289///      - [`error_capture::TRUSTY_NO_BUG_CAPTURE_ENV`] — opt-out env name.
290///      Additive: does not alter stderr logging. Opt-out via
291///      `TRUSTY_NO_BUG_CAPTURE=1`. New dep: `sha2` (already workspace-optional).
292/// Test: `cargo test -p trusty-common --features bug-capture`.
293#[cfg(feature = "bug-capture")]
294pub mod error_capture;
295
296pub use chat::{
297    ChatEvent, ChatProvider, LocalModelConfig, OllamaProvider, OpenRouterProvider, ToolCall,
298    ToolDef, auto_detect_local_provider,
299};
300
301use anyhow::{Context, Result, anyhow};
302use serde::{Deserialize, Serialize};
303use tokio::net::TcpListener;
304
305// ─── Port binding ─────────────────────────────────────────────────────────
306
307/// Bind to `addr`; if the port is in use, walk forward up to `max_attempts`
308/// ports and return the first listener that binds.
309///
310/// Why: Running multiple instances of a trusty-* daemon (or restarting before
311/// the kernel releases the prior socket) shouldn't produce a noisy failure —
312/// auto-incrementing gives a friendlier developer experience while still
313/// honouring the user's preferred starting port.
314/// What: returns the first successful `tokio::net::TcpListener`. Callers can
315/// inspect `local_addr()` to discover where it landed and report it however
316/// they prefer — this function does not perform any I/O on stdout/stderr.
317/// `max_attempts == 0` means "try `addr` exactly once".
318/// Test: `auto_port_walks_forward` binds a port, then calls this with the
319/// occupied port and confirms a different free port is returned.
320pub async fn bind_with_auto_port(addr: SocketAddr, max_attempts: u16) -> Result<TcpListener> {
321    use std::io::ErrorKind;
322    let mut current = addr;
323    for attempt in 0..=max_attempts {
324        match TcpListener::bind(current).await {
325            Ok(l) => return Ok(l),
326            Err(e) if e.kind() == ErrorKind::AddrInUse && attempt < max_attempts => {
327                let next_port = current.port().saturating_add(1);
328                if next_port == 0 {
329                    anyhow::bail!("ran out of ports while searching for free slot");
330                }
331                tracing::warn!("port {} in use, trying {}", current.port(), next_port);
332                current.set_port(next_port);
333            }
334            Err(e) => return Err(e.into()),
335        }
336    }
337    anyhow::bail!("could not find free port after {max_attempts} attempts")
338}
339
340// ─── Data directory ───────────────────────────────────────────────────────
341
342/// Environment variable name for the data-directory test escape hatch.
343///
344/// Why: macOS's `dirs::data_dir()` delegates to `NSFileManager`, a native Cocoa
345/// API that ignores `HOME` and `XDG_DATA_HOME`. Setting `HOME` in a test process
346/// does **not** redirect `dirs::data_dir()` on macOS, making path isolation
347/// impossible without a separate bypass. This constant names that bypass.
348///
349/// What: When `TRUSTY_DATA_DIR_OVERRIDE` is set in the environment,
350/// [`resolve_data_dir`] uses its value as the base directory and skips the
351/// `dirs::data_dir()` call entirely. The final path is
352/// `${TRUSTY_DATA_DIR_OVERRIDE}/<app_name>`, identical in structure to the
353/// normal OS-standard path.
354///
355/// **Intended for tests only.** Do not set this variable in production; it
356/// bypasses the OS-standard application-data directory.
357///
358/// Test: All `resolve_data_dir` tests in this module set this var to a
359/// temporary directory so they run identically on macOS, Linux, and Windows.
360pub const DATA_DIR_OVERRIDE_ENV: &str = "TRUSTY_DATA_DIR_OVERRIDE";
361
362/// Validate and, if necessary, replace an unsafe data-root path.
363///
364/// Why: `dirs::data_dir()` and the HOME-relative fallback can return dangerous
365/// paths when the daemon environment is degenerate — e.g. `HOME="/"` on Linux
366/// yields `/.trusty-memory`, and `XDG_DATA_HOME="/"` yields `/trusty-memory`.
367/// Neither of those are literal `/`, but both scatter application data directly
368/// under the filesystem root. This pure helper applies post-resolution
369/// validation to any candidate path regardless of which branch produced it, and
370/// returns a known-safe fallback path if any guard fires. Being infallible
371/// (always returns a usable path) avoids adding an error return to the many
372/// existing `resolve_data_dir` call sites while still preventing root-scatter.
373///
374/// What: checks, in order:
375/// 1. `candidate` must be absolute. If not, falls back to
376///    `$TMPDIR/trusty-<app_name>` and emits `tracing::error!`.
377/// 2. `candidate` must not be exactly `/`. If so, falls back and logs error.
378/// 3. `candidate`'s parent must not be `/` unless `candidate` is a normal
379///    user-data path (guards against e.g. `/.trusty-memory` from `HOME=/`).
380///    Paths whose sole parent is `/` receive the safe-temp fallback.
381///
382/// The safe fallback is `std::env::temp_dir().join(format!("trusty-{app_name}"))`.
383/// This lets the daemon still start (and log a clear error) rather than
384/// crash-looping when the host environment is misconfigured.
385///
386/// Test: `sanitize_data_root_rejects_relative`, `sanitize_data_root_rejects_root`,
387/// `sanitize_data_root_rejects_bare_root_child`, `sanitize_data_root_passes_valid_path`.
388pub fn sanitize_data_root(candidate: PathBuf, app_name: &str) -> PathBuf {
389    let safe_fallback = || std::env::temp_dir().join(format!("trusty-{app_name}"));
390
391    if !candidate.is_absolute() {
392        tracing::error!(
393            path = %candidate.display(),
394            app = app_name,
395            "resolved data root is not absolute; \
396             falling back to temp dir to prevent CWD-relative palace creation. \
397             Check HOME and TRUSTY_DATA_DIR_OVERRIDE in the daemon environment."
398        );
399        return safe_fallback();
400    }
401
402    if candidate == Path::new("/") {
403        tracing::error!(
404            app = app_name,
405            "resolved data root is the filesystem root (/); \
406             falling back to temp dir. \
407             Check HOME and TRUSTY_DATA_DIR_OVERRIDE in the daemon environment."
408        );
409        return safe_fallback();
410    }
411
412    // Reject paths whose immediate parent is "/" — these arise when HOME="/"
413    // (Linux) produces `/.trusty-memory` or XDG_DATA_HOME="/" produces
414    // `/trusty-memory`. A well-formed user-data path always has at least two
415    // non-root ancestors (e.g. `/home/user/...` or `/Users/user/...`).
416    if candidate.parent() == Some(Path::new("/")) {
417        tracing::error!(
418            path = %candidate.display(),
419            app = app_name,
420            "resolved data root is a direct child of the filesystem root; \
421             this usually means HOME or XDG_DATA_HOME is set to '/'. \
422             Falling back to temp dir to prevent data scatter under /."
423        );
424        return safe_fallback();
425    }
426
427    candidate
428}
429
430/// Resolve `<data_dir>/<app_name>`, creating it if it doesn't exist.
431///
432/// Why: All trusty-* tools want a per-machine, per-app directory under the
433/// OS-standard data dir (`~/Library/Application Support/`, `~/.local/share/`,
434/// `%APPDATA%/`). If `dirs::data_dir()` is unavailable (rare — locked-down
435/// containers), falls back to `~/.<app_name>` so the tool still works.
436///
437/// The [`DATA_DIR_OVERRIDE_ENV`] (`TRUSTY_DATA_DIR_OVERRIDE`) environment
438/// variable provides a test escape hatch: when set to a *non-empty absolute
439/// path*, `dirs::data_dir()` is **never called** and the variable's value is
440/// used as the base directory instead. This is necessary because macOS's
441/// `dirs::data_dir()` calls `NSFileManager` — a native Cocoa API that
442/// resolves the application-support directory through the system rather than
443/// through the process environment — so setting `HOME` or `XDG_DATA_HOME` in
444/// a test process does not redirect it. `TRUSTY_DATA_DIR_OVERRIDE` is the
445/// only reliable cross-platform way to isolate test data paths. **It is
446/// intended for tests only; do not set it in production.**
447///
448/// Safety guards (fixes latent defect tracked in #503 / #504):
449/// - An **empty or whitespace-only** override is treated as unset: a
450///   `tracing::warn!` is emitted and normal platform-dir resolution proceeds.
451///   `std::fs::create_dir_all("")` silently returns `Ok(())` on macOS/Linux,
452///   so without this guard the empty-override case would return a relative
453///   `"<app_name>"` path that resolves under the daemon's CWD — `/` under
454///   launchd — and create palace directories at the filesystem root.
455/// - A **non-absolute** (relative) override is rejected with an error, because
456///   relative paths are daemon-CWD-dependent and therefore non-deterministic.
457/// - A resolved root equal to `/` from an explicit override is rejected with
458///   an error.
459/// - The FINAL resolved path (from any branch) is validated by
460///   [`sanitize_data_root`]: non-absolute, `/`, or a bare `/`-child path
461///   (e.g. from `HOME="/"`) falls back to a safe temp location rather than
462///   allowing the daemon to create palace dirs at the filesystem root.
463///
464/// What: returns the absolute path `${base}/<app_name>` (created if absent).
465/// Resolution order:
466/// 1. `$TRUSTY_DATA_DIR_OVERRIDE/<app_name>` — when the env var is set to a
467///    non-empty absolute path that is not `/`.
468/// 2. `$(dirs::data_dir())/<app_name>` — normal OS-standard path.
469/// 3. `~/.<app_name>` — fallback when `dirs::data_dir()` returns `None`.
470///
471/// In all cases, the result passes through [`sanitize_data_root`].
472///
473/// Test: `resolve_data_dir_creates_directory`, `resolve_data_dir_empty_override_uses_platform_dir`,
474/// `resolve_data_dir_whitespace_override_uses_platform_dir`,
475/// `resolve_data_dir_relative_override_errors`,
476/// `resolve_data_dir_root_override_errors`,
477/// `sanitize_data_root_rejects_relative`, `sanitize_data_root_rejects_root`,
478/// `sanitize_data_root_rejects_bare_root_child`, `sanitize_data_root_passes_valid_path`.
479pub fn resolve_data_dir(app_name: &str) -> Result<PathBuf> {
480    let base = match std::env::var(DATA_DIR_OVERRIDE_ENV) {
481        Ok(raw) if raw.trim().is_empty() => {
482            // Empty or whitespace-only override: treat as unset, warn operator.
483            tracing::warn!(
484                env = DATA_DIR_OVERRIDE_ENV,
485                "TRUSTY_DATA_DIR_OVERRIDE is set but empty; ignoring and using \
486                 the platform data directory instead. An empty override would \
487                 produce a relative path that resolves against the daemon's \
488                 working directory (/ under launchd), which is never correct."
489            );
490            dirs::data_dir()
491                .or_else(|| dirs::home_dir().map(|h| h.join(format!(".{app_name}"))))
492                .context("could not resolve data directory or home directory")?
493        }
494        Ok(raw) => {
495            let p = PathBuf::from(&raw);
496            if !p.is_absolute() {
497                anyhow::bail!(
498                    "TRUSTY_DATA_DIR_OVERRIDE={raw:?} is a relative path; only \
499                     absolute paths are accepted to prevent the data directory \
500                     from depending on the daemon's working directory"
501                );
502            }
503            if p == Path::new("/") {
504                anyhow::bail!(
505                    "TRUSTY_DATA_DIR_OVERRIDE={raw:?} resolves to the filesystem \
506                     root (/); refusing to create palace directories directly \
507                     under / as that would scatter data across the root filesystem"
508                );
509            }
510            p
511        }
512        Err(_) => dirs::data_dir()
513            .or_else(|| dirs::home_dir().map(|h| h.join(format!(".{app_name}"))))
514            .context("could not resolve data directory or home directory")?,
515    };
516    let dir = if base.ends_with(format!(".{app_name}")) {
517        base
518    } else {
519        base.join(app_name)
520    };
521    // Post-resolution validation: applies to every branch, including the
522    // platform-default path. Dangerous paths (non-absolute, /, bare /-child)
523    // are replaced with a safe temp fallback rather than returning an error,
524    // so misconfigured daemon environments degrade gracefully instead of
525    // crash-looping.
526    let dir = sanitize_data_root(dir, app_name);
527    std::fs::create_dir_all(&dir)
528        .with_context(|| format!("create data directory {}", dir.display()))?;
529    Ok(dir)
530}
531
532// ─── Daemon address file ──────────────────────────────────────────────────
533
534/// Filename used inside each app's data directory to record the daemon's
535/// bound HTTP address. Kept as a module-level constant so writers and readers
536/// can't drift.
537const DAEMON_ADDR_FILENAME: &str = "http_addr";
538
539/// Write the daemon's bound HTTP address to the app's data directory.
540///
541/// Why: Both trusty-search and trusty-memory persist their bound `host:port`
542/// to disk so MCP clients (and follow-up CLI invocations) can discover where
543/// the daemon ended up after auto-port-walking. Centralising the path layout
544/// keeps the two projects in sync and prevents a third trusty-* daemon from
545/// inventing yet another location.
546/// What: writes `addr` verbatim (no trailing newline) to
547/// `{resolve_data_dir(app_name)}/http_addr`, creating the directory if it
548/// doesn't yet exist. Atomic-overwrite semantics aren't required — the file
549/// is rewritten on every daemon start.
550/// Test: `daemon_addr_round_trips` writes then reads under a stubbed HOME and
551/// confirms equality.
552pub fn write_daemon_addr(app_name: &str, addr: &str) -> Result<()> {
553    let dir = resolve_data_dir(app_name)?;
554    let path = dir.join(DAEMON_ADDR_FILENAME);
555    std::fs::write(&path, addr).with_context(|| format!("write daemon addr to {}", path.display()))
556}
557
558/// Read the daemon's HTTP address from the app's data directory.
559///
560/// Why: CLI commands and MCP clients need to discover the running daemon's
561/// bound port. Returning `Option` lets callers distinguish "daemon never
562/// started" (file absent) from "filesystem error" (permission denied, etc.)
563/// without resorting to string matching on error messages.
564/// What: reads `{resolve_data_dir(app_name)}/http_addr`, trims surrounding
565/// whitespace, and returns `Some(addr)`. Returns `Ok(None)` iff the file
566/// does not exist; any other I/O error propagates as `Err`.
567/// Test: `daemon_addr_round_trips` and `read_daemon_addr_missing_returns_none`.
568pub fn read_daemon_addr(app_name: &str) -> Result<Option<String>> {
569    let dir = resolve_data_dir(app_name)?;
570    let path = dir.join(DAEMON_ADDR_FILENAME);
571    match std::fs::read_to_string(&path) {
572        Ok(s) => Ok(Some(s.trim().to_string())),
573        Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(None),
574        Err(e) => Err(anyhow::Error::new(e))
575            .with_context(|| format!("read daemon addr from {}", path.display())),
576    }
577}
578
579// ─── Already-running guard ────────────────────────────────────────────────
580
581/// Issue a short-timeout `GET {base_url}{health_path}` and report whether it
582/// returns a 2xx response.
583///
584/// Why: every trusty-* daemon's "is one already running?" check follows the
585/// same shape — probe the recorded address for `/health` with a tight timeout
586/// so a dead daemon does not block the start command for the discovery
587/// timeout. Lifting the probe into one helper keeps the request/timeout
588/// configuration identical across `check_already_running` (file-based) and the
589/// trusty-mpm lock-file path (where the URL is derived from a TOML file).
590/// What: builds a `reqwest::Client` with a 1 s request timeout, issues the GET,
591/// returns `true` only when the response is HTTP 2xx. Any client-builder error
592/// or transport failure returns `false`.
593/// Test: covered indirectly via `check_already_running_*` and the three daemon
594/// integration paths.
595pub async fn probe_health(base_url: &str, health_path: &str) -> bool {
596    let probe = format!("{base_url}{health_path}");
597    let client = match reqwest::Client::builder()
598        .timeout(std::time::Duration::from_secs(1))
599        .build()
600    {
601        Ok(c) => c,
602        Err(_) => return false,
603    };
604    matches!(client.get(&probe).send().await, Ok(resp) if resp.status().is_success())
605}
606
607/// Probe whether an existing daemon recorded at `addr_file` is healthy and,
608/// if so, return its base URL so the caller can refuse to start a duplicate.
609///
610/// Why: every trusty-* daemon (search, memory, mpm) historically port-walked on
611/// boot. Invoking the `start` / `serve` command a second time silently spawned
612/// a second instance on the next free port — splitting traffic between two
613/// stores, doubling RSS, and confusing every client that resolves the address
614/// from disk. The CLI must read the recorded address, ask the live process for
615/// `/health`, and if both succeed report "already running" and exit 0 rather
616/// than racing a duplicate process against the port walker. A shared helper
617/// keeps the three daemons honest — drift here is the bug we are fixing.
618/// What: returns `Some("http://<addr>")` only when (a) `addr_file` exists and
619/// is readable, (b) its trimmed contents parse as a non-empty `host:port`, and
620/// (c) an HTTP `GET http://<addr><health_path>` returns a 2xx within ~1.5 s
621/// (1 s request timeout plus tokio scheduling slack). Returns `None` on every
622/// other outcome — missing file, unreadable contents, dead address, non-2xx
623/// response — so the caller treats that as "no live daemon, proceed".
624/// Side-effect (stale-file cleanup): when the file exists but the health probe
625/// fails (or the file is empty / malformed), the function best-effort deletes
626/// it via `std::fs::remove_file` so the next caller does not chase the same
627/// dead address. A delete failure is intentionally ignored.
628/// Test: `check_already_running_returns_none_when_file_missing`,
629/// `check_already_running_returns_none_when_file_empty`,
630/// `check_already_running_returns_none_when_address_dead`,
631/// `check_already_running_returns_url_when_health_ok`.
632pub async fn check_already_running(addr_file: &Path, health_path: &str) -> Option<String> {
633    let raw = match std::fs::read_to_string(addr_file) {
634        Ok(s) => s,
635        Err(_) => return None,
636    };
637    let addr = raw.trim();
638    if addr.is_empty() {
639        // Empty / whitespace-only file is treated as stale — best-effort delete.
640        let _ = std::fs::remove_file(addr_file);
641        return None;
642    }
643    let url = format!("http://{addr}");
644    if probe_health(&url, health_path).await {
645        Some(url)
646    } else {
647        // Stale file pointing at a dead address. Clear it so the next start
648        // attempt is not blocked by a probe against the dead URL.
649        let _ = std::fs::remove_file(addr_file);
650        None
651    }
652}
653
654// ─── CLI initialisation ───────────────────────────────────────────────────
655
656/// Initialise the global tracing subscriber.
657///
658/// Why: Every trusty-* binary wants the same verbosity ladder and the same
659/// `RUST_LOG` override semantics. Defining it once removes the boilerplate
660/// from every `main.rs`.
661/// What: `verbose_count` maps `0 → warn`, `1 → info`, `2 → debug`, `3+ →
662/// trace`. If `RUST_LOG` is set in the environment it wins. Logs go to
663/// stderr so stdout stays clean for MCP JSON-RPC.
664/// Test: side-effecting (global subscriber) — covered by integration with
665/// `cargo run -- -v status` in downstream crates.
666pub fn init_tracing(verbose_count: u8) {
667    let default_filter = match verbose_count {
668        0 => "warn",
669        1 => "info",
670        2 => "debug",
671        _ => "trace",
672    };
673    let filter = tracing_subscriber::EnvFilter::try_from_default_env()
674        .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new(default_filter));
675    // try_init so callers that pre-install a subscriber don't panic.
676    let _ = tracing_subscriber::fmt()
677        .with_env_filter(filter)
678        .with_writer(std::io::stderr)
679        .with_target(false)
680        .try_init();
681}
682
683/// Initialise the global tracing subscriber and capture events into a
684/// [`log_buffer::LogBuffer`] so the daemon can serve recent logs over HTTP.
685///
686/// Why: daemons expose `GET /logs/tail`, which needs an in-memory ring of
687/// recent log lines. Routing capture through the subscriber means every
688/// existing `tracing::info!` / `warn!` call site is mirrored automatically —
689/// no second logging API to keep in sync. The stderr `fmt` layer is retained
690/// so operators still see live logs in the terminal / launchd log file.
691/// What: builds a `tracing_subscriber::registry` with two layers — the
692/// standard stderr `fmt` layer (same verbosity ladder + `RUST_LOG` override
693/// as [`init_tracing`]) and a [`log_buffer::LogBufferLayer`] feeding the
694/// returned [`log_buffer::LogBuffer`]. Uses `try_init`, so a process that has
695/// already installed a subscriber keeps it; the returned buffer is still
696/// valid (just empty) in that case.
697/// Test: `cargo test -p trusty-common log_buffer` covers the layer; the
698/// daemon `/logs/tail` integration tests cover the wired path end-to-end.
699#[must_use]
700pub fn init_tracing_with_buffer(verbose_count: u8, capacity: usize) -> log_buffer::LogBuffer {
701    use tracing_subscriber::Layer as _;
702    use tracing_subscriber::layer::SubscriberExt;
703    use tracing_subscriber::util::SubscriberInitExt;
704
705    let default_filter = match verbose_count {
706        0 => "warn",
707        1 => "info",
708        2 => "debug",
709        _ => "trace",
710    };
711    // Stderr filter follows the same verbosity ladder + `RUST_LOG` override as
712    // `init_tracing` so terminal output stays compact at the operator's chosen
713    // level.
714    let stderr_filter = tracing_subscriber::EnvFilter::try_from_default_env()
715        .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new(default_filter));
716
717    // The log-buffer layer must capture activity even when the stderr filter
718    // is set to `warn` (the default for `trusty-search start` without `-v`).
719    // Operators reading `/logs/tail` expect to see info-level lifecycle events
720    // (file-watcher reindexes, startup scans). Without a separate filter the
721    // global stderr filter would suppress them before they reach the buffer.
722    // `RUST_LOG_BUFFER` lets ops widen or narrow the buffer independently of
723    // stderr; the default of `info` matches the activity feed's intent.
724    let buffer_filter = tracing_subscriber::EnvFilter::try_from_env("RUST_LOG_BUFFER")
725        .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info"));
726
727    let buffer = log_buffer::LogBuffer::new(capacity);
728    let fmt_layer = tracing_subscriber::fmt::layer()
729        .with_writer(std::io::stderr)
730        .with_target(false)
731        .with_filter(stderr_filter);
732    let buf_layer = log_buffer::LogBufferLayer::new(buffer.clone()).with_filter(buffer_filter);
733    // try_init so callers that pre-install a subscriber don't panic — the
734    // returned buffer simply stays empty in that (rare) case.
735    let _ = tracing_subscriber::registry()
736        .with(fmt_layer)
737        .with(buf_layer)
738        .try_init();
739    buffer
740}
741
742/// Initialise the global tracing subscriber with a [`log_buffer::LogBuffer`]
743/// **and** a [`error_capture::BugCaptureLayer`] composed in one `try_init` call.
744///
745/// Why: `tracing_subscriber::registry().try_init()` can only succeed once per
746///      process. Callers that need both the HTTP log-tail buffer (issue #35)
747///      and Phase 1 bug capture must compose all three layers in a single call;
748///      two separate `try_init` calls would leave the second one silently ignored.
749///      This helper is the canonical entry-point for daemon binaries that want
750///      both features wired together at startup.
751/// What: builds an `EnvFilter`-gated stderr `fmt` layer, an info-level
752///      `LogBufferLayer`, and a `BugCaptureLayer` for `app_name`/`crate_version`;
753///      installs them together via `try_init`. Returns `(LogBuffer, ErrorStore)`
754///      so the caller can stash both handles in the daemon's `AppState`.
755///      All capture is to a JSONL file under `<dirs::data_dir()>/<app_name>/`
756///      and an in-memory ring — nothing is written to stdout, so this is
757///      MCP-safe. Honours `TRUSTY_NO_BUG_CAPTURE` for opt-out.
758/// Test: `cargo test -p trusty-common --features bug-capture -- init_tracing_with_capture`.
759#[cfg(feature = "bug-capture")]
760#[must_use]
761pub fn init_tracing_with_buffer_and_capture(
762    verbose_count: u8,
763    capacity: usize,
764    app_name: &str,
765    crate_version: impl Into<String>,
766) -> (log_buffer::LogBuffer, error_capture::ErrorStore) {
767    use tracing_subscriber::Layer as _;
768    use tracing_subscriber::layer::SubscriberExt;
769    use tracing_subscriber::util::SubscriberInitExt;
770
771    let default_filter = match verbose_count {
772        0 => "warn",
773        1 => "info",
774        2 => "debug",
775        _ => "trace",
776    };
777    let stderr_filter = tracing_subscriber::EnvFilter::try_from_default_env()
778        .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new(default_filter));
779    let buffer_filter = tracing_subscriber::EnvFilter::try_from_env("RUST_LOG_BUFFER")
780        .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info"));
781
782    let buffer = log_buffer::LogBuffer::new(capacity);
783    let (capture_layer, store) = error_capture::bug_capture_layer(
784        app_name,
785        error_capture::DEFAULT_CAPTURE_CAPACITY,
786        crate_version,
787    );
788
789    let fmt_layer = tracing_subscriber::fmt::layer()
790        .with_writer(std::io::stderr)
791        .with_target(false)
792        .with_filter(stderr_filter);
793    let buf_layer = log_buffer::LogBufferLayer::new(buffer.clone()).with_filter(buffer_filter);
794    // All three layers are composed in one try_init so subsequent try_init
795    // calls from other code paths become no-ops and do not race with ours.
796    let _ = tracing_subscriber::registry()
797        .with(fmt_layer)
798        .with(buf_layer)
799        .with(capture_layer)
800        .try_init();
801    (buffer, store)
802}
803
804/// Disable coloured terminal output when requested or when stdout is not a TTY.
805///
806/// Why: Pipe-friendly output is mandatory for scripting (`trusty-search list
807/// | jq …`). `NO_COLOR` / `TERM=dumb` are the canonical signals; passing
808/// `--no-color` should override too.
809/// What: calls `colored::control::set_override(false)` when the caller asks
810/// for it or when the standard heuristics indicate no colour.
811/// Test: side-effecting global; trivially covered by manual `NO_COLOR=1 cargo
812/// run -- list`.
813pub fn maybe_disable_color(no_color: bool) {
814    let env_says_no =
815        std::env::var("NO_COLOR").is_ok() || std::env::var("TERM").as_deref() == Ok("dumb");
816    if no_color || env_says_no {
817        colored::control::set_override(false);
818    }
819}
820
821// ─── OpenRouter ───────────────────────────────────────────────────────────
822
823const OPENROUTER_URL: &str = "https://openrouter.ai/api/v1/chat/completions";
824const HTTP_REFERER: &str = "https://github.com/bobmatnyc/trusty-common";
825const X_TITLE: &str = "trusty-common";
826const OPENROUTER_CONNECT_TIMEOUT_SECS: u64 = 10;
827const OPENROUTER_REQUEST_TIMEOUT_SECS: u64 = 120; // chat completions can take 60–90s
828
829/// OpenAI-compatible chat message.
830///
831/// Why: Both trusty-memory's `chat` subcommand and trusty-search's `/chat`
832/// endpoint speak the OpenRouter format. Sharing the struct keeps them in
833/// step (and lets callers compose chat histories without re-defining types).
834/// Tool-use additions (`tool_call_id`, `tool_calls`) follow the OpenAI
835/// function-calling shape: assistant messages set `tool_calls` when the model
836/// requests tool invocations; subsequent `role: "tool"` messages echo the
837/// matching `tool_call_id` with the tool's result in `content`.
838/// What: `role` is one of `"system" | "user" | "assistant" | "tool"`.
839/// `content` is the message text. `tool_call_id` is the id of the tool call
840/// this message is replying to (only set when `role == "tool"`). `tool_calls`
841/// is the raw OpenAI `tool_calls` array on an assistant message that asked
842/// to invoke tools — kept as `serde_json::Value` so we don't drop any fields
843/// the upstream may add.
844/// Test: serde round-trip in `chat_message_round_trips`.
845#[derive(Debug, Clone, Serialize, Deserialize)]
846pub struct ChatMessage {
847    pub role: String,
848    pub content: String,
849    #[serde(skip_serializing_if = "Option::is_none", default)]
850    pub tool_call_id: Option<String>,
851    #[serde(skip_serializing_if = "Option::is_none", default)]
852    pub tool_calls: Option<Vec<serde_json::Value>>,
853}
854
855#[derive(Debug, Serialize)]
856struct ChatRequest<'a> {
857    model: &'a str,
858    messages: &'a [ChatMessage],
859    stream: bool,
860}
861
862#[derive(Debug, Deserialize)]
863struct ChatResponse {
864    choices: Vec<Choice>,
865}
866
867#[derive(Debug, Deserialize)]
868struct Choice {
869    message: ResponseMessage,
870}
871
872#[derive(Debug, Deserialize)]
873struct ResponseMessage {
874    #[serde(default)]
875    content: String,
876}
877
878/// Send a chat completion request to OpenRouter and return the assistant's
879/// message content.
880///
881/// Why: A one-shot, non-streaming chat call is the common-case helper — used
882/// by trusty-memory's `chat` CLI and trusty-search's `/chat` endpoint.
883/// What: POSTs `{model, messages, stream: false}` to OpenRouter with bearer
884/// auth, decodes the response, and returns `choices[0].message.content`.
885/// Errors propagate as anyhow with HTTP status context.
886/// Test: error paths covered by `openrouter_propagates_http_errors` (uses a
887/// blackhole base URL — no real call).
888#[deprecated(since = "0.3.1", note = "Use OpenRouterProvider::chat_stream instead")]
889pub async fn openrouter_chat(
890    api_key: &str,
891    model: &str,
892    messages: Vec<ChatMessage>,
893) -> Result<String> {
894    if api_key.is_empty() {
895        return Err(anyhow!("openrouter api key is empty"));
896    }
897    let client = reqwest::Client::builder()
898        .connect_timeout(std::time::Duration::from_secs(
899            OPENROUTER_CONNECT_TIMEOUT_SECS,
900        ))
901        .timeout(std::time::Duration::from_secs(
902            OPENROUTER_REQUEST_TIMEOUT_SECS,
903        ))
904        .build()
905        .context("build reqwest client for openrouter_chat")?;
906    let body = ChatRequest {
907        model,
908        messages: &messages,
909        stream: false,
910    };
911    let resp = client
912        .post(OPENROUTER_URL)
913        .bearer_auth(api_key)
914        .header("HTTP-Referer", HTTP_REFERER)
915        .header("X-Title", X_TITLE)
916        .json(&body)
917        .send()
918        .await
919        .context("POST openrouter chat completions")?;
920    let status = resp.status();
921    if !status.is_success() {
922        let text = resp.text().await.unwrap_or_default();
923        return Err(anyhow!("openrouter HTTP {status}: {text}"));
924    }
925    let payload: ChatResponse = resp.json().await.context("decode openrouter response")?;
926    payload
927        .choices
928        .into_iter()
929        .next()
930        .map(|c| c.message.content)
931        .ok_or_else(|| anyhow!("openrouter returned no choices"))
932}
933
934/// Stream chat-completion deltas from OpenRouter through a tokio mpsc channel.
935///
936/// Why: `chat` UIs want incremental tokens for a responsive feel; the
937/// streaming endpoint emits SSE `data:` frames with delta content.
938/// What: POSTs the request with `stream: true`, parses each SSE `data:` line
939/// as a JSON object, extracts `choices[0].delta.content`, and sends each
940/// non-empty chunk to `tx`. The function returns when the stream terminates
941/// (either by `[DONE]` sentinel or by upstream EOF).
942/// Test: integration-only (no offline mock); covered manually via the
943/// trusty-search `/chat` endpoint that re-uses this helper.
944#[deprecated(since = "0.3.1", note = "Use OpenRouterProvider::chat_stream instead")]
945pub async fn openrouter_chat_stream(
946    api_key: &str,
947    model: &str,
948    messages: Vec<ChatMessage>,
949    tx: tokio::sync::mpsc::Sender<String>,
950) -> Result<()> {
951    use futures_util::StreamExt;
952
953    if api_key.is_empty() {
954        return Err(anyhow!("openrouter api key is empty"));
955    }
956    let client = reqwest::Client::builder()
957        .connect_timeout(std::time::Duration::from_secs(
958            OPENROUTER_CONNECT_TIMEOUT_SECS,
959        ))
960        .timeout(std::time::Duration::from_secs(
961            OPENROUTER_REQUEST_TIMEOUT_SECS,
962        ))
963        .build()
964        .context("build reqwest client for openrouter_chat_stream")?;
965    let body = ChatRequest {
966        model,
967        messages: &messages,
968        stream: true,
969    };
970    let resp = client
971        .post(OPENROUTER_URL)
972        .bearer_auth(api_key)
973        .header("HTTP-Referer", HTTP_REFERER)
974        .header("X-Title", X_TITLE)
975        .json(&body)
976        .send()
977        .await
978        .context("POST openrouter chat completions (stream)")?;
979    let status = resp.status();
980    if !status.is_success() {
981        let text = resp.text().await.unwrap_or_default();
982        return Err(anyhow!("openrouter HTTP {status}: {text}"));
983    }
984
985    let mut buf = String::new();
986    let mut stream = resp.bytes_stream();
987    while let Some(chunk) = stream.next().await {
988        let bytes = chunk.context("read openrouter stream chunk")?;
989        let text = match std::str::from_utf8(&bytes) {
990            Ok(s) => s,
991            Err(_) => continue,
992        };
993        buf.push_str(text);
994
995        while let Some(idx) = buf.find('\n') {
996            let line: String = buf.drain(..=idx).collect();
997            let line = line.trim();
998            let Some(payload) = line.strip_prefix("data:").map(str::trim) else {
999                continue;
1000            };
1001            if payload.is_empty() || payload == "[DONE]" {
1002                continue;
1003            }
1004            let v: serde_json::Value = match serde_json::from_str(payload) {
1005                Ok(v) => v,
1006                Err(_) => continue,
1007            };
1008            if let Some(delta) = v
1009                .get("choices")
1010                .and_then(|c| c.get(0))
1011                .and_then(|c| c.get("delta"))
1012                .and_then(|d| d.get("content"))
1013                .and_then(|c| c.as_str())
1014                && !delta.is_empty()
1015                && tx.send(delta.to_string()).await.is_err()
1016            {
1017                // Receiver dropped — caller has lost interest.
1018                return Ok(());
1019            }
1020        }
1021    }
1022    Ok(())
1023}
1024
1025// ─── Misc helpers ─────────────────────────────────────────────────────────
1026
1027/// Check whether a path exists and is a directory.
1028///
1029/// Why: tiny but commonly-needed shim — clearer at call sites than
1030/// `path.exists() && path.is_dir()`.
1031/// What: returns `true` iff the path exists and metadata reports a directory.
1032/// Test: `is_dir_recognises_directories`.
1033pub fn is_dir(path: &Path) -> bool {
1034    path.metadata().map(|m| m.is_dir()).unwrap_or(false)
1035}
1036
1037#[cfg(test)]
1038mod tests {
1039    use super::*;
1040    use std::sync::Mutex;
1041
1042    /// Serialises tests that mutate the `TRUSTY_DATA_DIR_OVERRIDE` env var so
1043    /// they don't race when `cargo test` runs them in parallel threads.
1044    static ENV_LOCK: Mutex<()> = Mutex::new(());
1045
1046    #[tokio::test]
1047    async fn auto_port_walks_forward() {
1048        // Bind to an OS-chosen port, then ask auto-port to start there.
1049        let occupied = TcpListener::bind("127.0.0.1:0").await.unwrap();
1050        let port = occupied.local_addr().unwrap().port();
1051        let addr: SocketAddr = format!("127.0.0.1:{port}").parse().unwrap();
1052        let next = bind_with_auto_port(addr, 8).await.unwrap();
1053        let got = next.local_addr().unwrap().port();
1054        assert_ne!(got, port, "expected walk-forward to a different port");
1055    }
1056
1057    #[tokio::test]
1058    async fn auto_port_zero_attempts_still_binds_free() {
1059        let addr: SocketAddr = "127.0.0.1:0".parse().unwrap();
1060        let l = bind_with_auto_port(addr, 0).await.unwrap();
1061        assert!(l.local_addr().unwrap().port() > 0);
1062    }
1063
1064    #[test]
1065    fn resolve_data_dir_creates_directory() {
1066        let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
1067        // Use the override env var so we deterministically control the base
1068        // directory cross-platform (macOS's dirs::data_dir ignores HOME).
1069        let tmp = tempfile_like_dir();
1070        // SAFETY: env mutation; tests in this module run serially via
1071        // #[test] threading isolation only when MUTEX-guarded — we accept
1072        // the residual risk since the override var is unique to these tests.
1073        unsafe {
1074            std::env::set_var(DATA_DIR_OVERRIDE_ENV, &tmp);
1075        }
1076        let dir = resolve_data_dir("trusty-test-xyz").unwrap();
1077        assert!(
1078            dir.exists(),
1079            "data dir should be created at {}",
1080            dir.display()
1081        );
1082        assert!(dir.is_dir());
1083        assert!(
1084            dir.starts_with(&tmp),
1085            "data dir {} should live under override {}",
1086            dir.display(),
1087            tmp.display()
1088        );
1089        unsafe {
1090            std::env::remove_var(DATA_DIR_OVERRIDE_ENV);
1091        }
1092    }
1093
1094    /// Why: guard introduced in #503 — an empty override must not produce a
1095    /// relative path that resolves under the daemon CWD.
1096    /// What: sets TRUSTY_DATA_DIR_OVERRIDE="" and asserts the result is an
1097    /// absolute path that does NOT start with "".
1098    /// Test: this function.
1099    #[test]
1100    fn resolve_data_dir_empty_override_uses_platform_dir() {
1101        let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
1102        // SAFETY: env mutation; serialised by ENV_LOCK.
1103        unsafe {
1104            std::env::set_var(DATA_DIR_OVERRIDE_ENV, "");
1105        }
1106        let result = resolve_data_dir("trusty-test-empty-override");
1107        unsafe {
1108            std::env::remove_var(DATA_DIR_OVERRIDE_ENV);
1109        }
1110        let dir = result.expect("empty override should fall back to platform dir");
1111        assert!(
1112            dir.is_absolute(),
1113            "resolved dir should be absolute, got {}",
1114            dir.display()
1115        );
1116        assert_ne!(
1117            dir,
1118            std::path::PathBuf::from("/"),
1119            "resolved dir must not be filesystem root"
1120        );
1121    }
1122
1123    /// Why: whitespace-only overrides are as dangerous as empty ones.
1124    /// What: sets TRUSTY_DATA_DIR_OVERRIDE="   " and asserts an absolute fallback.
1125    /// Test: this function.
1126    #[test]
1127    fn resolve_data_dir_whitespace_override_uses_platform_dir() {
1128        let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
1129        // SAFETY: env mutation; serialised by ENV_LOCK.
1130        unsafe {
1131            std::env::set_var(DATA_DIR_OVERRIDE_ENV, "   ");
1132        }
1133        let result = resolve_data_dir("trusty-test-ws-override");
1134        unsafe {
1135            std::env::remove_var(DATA_DIR_OVERRIDE_ENV);
1136        }
1137        let dir = result.expect("whitespace override should fall back to platform dir");
1138        assert!(dir.is_absolute(), "resolved dir should be absolute");
1139    }
1140
1141    /// Why: a relative override is non-deterministic (depends on daemon CWD).
1142    /// What: sets TRUSTY_DATA_DIR_OVERRIDE="relative/path" and asserts an error.
1143    /// Test: this function.
1144    #[test]
1145    fn resolve_data_dir_relative_override_errors() {
1146        let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
1147        // SAFETY: env mutation; serialised by ENV_LOCK.
1148        unsafe {
1149            std::env::set_var(DATA_DIR_OVERRIDE_ENV, "relative/path");
1150        }
1151        let result = resolve_data_dir("trusty-test-relative");
1152        unsafe {
1153            std::env::remove_var(DATA_DIR_OVERRIDE_ENV);
1154        }
1155        assert!(
1156            result.is_err(),
1157            "relative override should be rejected, but got Ok({})",
1158            result.unwrap().display()
1159        );
1160        let msg = result.unwrap_err().to_string();
1161        assert!(
1162            msg.contains("relative"),
1163            "error should mention 'relative', got: {msg}"
1164        );
1165    }
1166
1167    /// Why: override set to "/" would create palace dirs directly under the
1168    /// filesystem root, scattering data.
1169    /// What: sets TRUSTY_DATA_DIR_OVERRIDE="/" and asserts an error.
1170    /// Test: this function.
1171    #[test]
1172    fn resolve_data_dir_root_override_errors() {
1173        let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
1174        // SAFETY: env mutation; serialised by ENV_LOCK.
1175        unsafe {
1176            std::env::set_var(DATA_DIR_OVERRIDE_ENV, "/");
1177        }
1178        let result = resolve_data_dir("trusty-test-root");
1179        unsafe {
1180            std::env::remove_var(DATA_DIR_OVERRIDE_ENV);
1181        }
1182        assert!(
1183            result.is_err(),
1184            "root '/' override should be rejected, but got Ok({})",
1185            result.unwrap().display()
1186        );
1187        let msg = result.unwrap_err().to_string();
1188        assert!(
1189            msg.contains('/'),
1190            "error should mention the path, got: {msg}"
1191        );
1192    }
1193
1194    /// Why: confirms that a valid absolute override is still honoured, so the
1195    /// guard changes do not break the test-isolation use-case.
1196    /// What: sets TRUSTY_DATA_DIR_OVERRIDE to a tempdir and asserts the resolved
1197    /// path lives under it.
1198    /// Test: this function (complements resolve_data_dir_creates_directory).
1199    #[test]
1200    fn resolve_data_dir_valid_absolute_override_is_honoured() {
1201        let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
1202        let tmp = tempfile_like_dir();
1203        // SAFETY: env mutation; serialised by ENV_LOCK.
1204        unsafe {
1205            std::env::set_var(DATA_DIR_OVERRIDE_ENV, &tmp);
1206        }
1207        let result = resolve_data_dir("trusty-test-abs-override");
1208        unsafe {
1209            std::env::remove_var(DATA_DIR_OVERRIDE_ENV);
1210        }
1211        let dir = result.expect("valid absolute override should succeed");
1212        assert!(
1213            dir.starts_with(&tmp),
1214            "resolved dir {} should be under override {}",
1215            dir.display(),
1216            tmp.display()
1217        );
1218        assert!(dir.is_absolute(), "resolved dir must be absolute");
1219    }
1220
1221    /// Why: `sanitize_data_root` must catch a relative candidate (e.g. produced
1222    /// by a code path that forgot to prepend a base dir) and replace it with the
1223    /// safe temp fallback.
1224    /// What: passes `PathBuf::from("relative/path")` and asserts the returned
1225    /// path is absolute, lives under `temp_dir()`, and starts with "trusty-".
1226    /// Test: this function.
1227    #[test]
1228    fn sanitize_data_root_rejects_relative() {
1229        let result = sanitize_data_root(PathBuf::from("relative/path"), "myapp");
1230        assert!(result.is_absolute(), "fallback must be absolute");
1231        let name = result.file_name().unwrap().to_string_lossy();
1232        assert!(
1233            name.starts_with("trusty-"),
1234            "fallback dir name should start with trusty-, got {name}"
1235        );
1236    }
1237
1238    /// Why: a candidate equal to "/" must be replaced — palace dirs would be
1239    /// created directly at the filesystem root.
1240    /// What: passes `PathBuf::from("/")` and asserts a safe fallback is returned.
1241    /// Test: this function.
1242    #[test]
1243    fn sanitize_data_root_rejects_root() {
1244        let result = sanitize_data_root(PathBuf::from("/"), "myapp");
1245        assert!(result.is_absolute(), "fallback must be absolute");
1246        assert_ne!(result, PathBuf::from("/"), "must not still be /");
1247        let name = result.file_name().unwrap().to_string_lossy();
1248        assert!(
1249            name.starts_with("trusty-"),
1250            "fallback should start with trusty-"
1251        );
1252    }
1253
1254    /// Why: `HOME="/"` on Linux yields `/.trusty-memory`; `XDG_DATA_HOME="/"`
1255    /// yields `/trusty-memory`. These are direct children of `/` and are just
1256    /// as dangerous as `/` itself for data-scattering.
1257    /// What: passes `/bare-child` (parent == "/") and asserts a safe fallback.
1258    /// Test: this function.
1259    #[test]
1260    fn sanitize_data_root_rejects_bare_root_child() {
1261        let result = sanitize_data_root(PathBuf::from("/bare-child"), "myapp");
1262        assert!(result.is_absolute(), "fallback must be absolute");
1263        assert_ne!(
1264            result,
1265            PathBuf::from("/bare-child"),
1266            "bare root-child must be replaced"
1267        );
1268        let name = result.file_name().unwrap().to_string_lossy();
1269        assert!(
1270            name.starts_with("trusty-"),
1271            "fallback should start with trusty-"
1272        );
1273    }
1274
1275    /// Why: valid paths (two or more non-root ancestors) must pass through
1276    /// unchanged — we must not accidentally redirect legitimate data dirs.
1277    /// What: passes a tempdir-based path and asserts it is returned unmodified.
1278    /// Test: this function.
1279    #[test]
1280    fn sanitize_data_root_passes_valid_path() {
1281        let tmp = tempfile_like_dir();
1282        let candidate = tmp.join("trusty-myapp");
1283        let result = sanitize_data_root(candidate.clone(), "myapp");
1284        assert_eq!(
1285            result, candidate,
1286            "valid absolute path should be returned unchanged"
1287        );
1288    }
1289
1290    #[test]
1291    fn daemon_addr_round_trips() {
1292        let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
1293        let tmp = tempfile_like_dir();
1294        // SAFETY: env mutation; see note in resolve_data_dir_creates_directory.
1295        unsafe {
1296            std::env::set_var(DATA_DIR_OVERRIDE_ENV, &tmp);
1297        }
1298        let app = format!(
1299            "trusty-test-daemon-{}-{}",
1300            std::process::id(),
1301            std::time::SystemTime::now()
1302                .duration_since(std::time::UNIX_EPOCH)
1303                .map(|d| d.as_nanos())
1304                .unwrap_or(0)
1305        );
1306        write_daemon_addr(&app, "127.0.0.1:12345").unwrap();
1307        let got = read_daemon_addr(&app).unwrap();
1308        unsafe {
1309            std::env::remove_var(DATA_DIR_OVERRIDE_ENV);
1310        }
1311        assert_eq!(got.as_deref(), Some("127.0.0.1:12345"));
1312    }
1313
1314    #[test]
1315    fn read_daemon_addr_missing_returns_none() {
1316        let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
1317        let tmp = tempfile_like_dir();
1318        // SAFETY: env mutation; see note in resolve_data_dir_creates_directory.
1319        unsafe {
1320            std::env::set_var(DATA_DIR_OVERRIDE_ENV, &tmp);
1321        }
1322        let app = format!(
1323            "trusty-test-daemon-missing-{}-{}",
1324            std::process::id(),
1325            std::time::SystemTime::now()
1326                .duration_since(std::time::UNIX_EPOCH)
1327                .map(|d| d.as_nanos())
1328                .unwrap_or(0)
1329        );
1330        let got = read_daemon_addr(&app).unwrap();
1331        unsafe {
1332            std::env::remove_var(DATA_DIR_OVERRIDE_ENV);
1333        }
1334        assert!(got.is_none(), "expected None when file absent, got {got:?}");
1335    }
1336
1337    #[test]
1338    fn is_dir_recognises_directories() {
1339        let tmp = tempfile_like_dir();
1340        assert!(is_dir(&tmp));
1341        assert!(!is_dir(&tmp.join("nope")));
1342    }
1343
1344    #[test]
1345    fn chat_message_round_trips() {
1346        let m = ChatMessage {
1347            role: "user".into(),
1348            content: "hello".into(),
1349            tool_call_id: None,
1350            tool_calls: None,
1351        };
1352        let s = serde_json::to_string(&m).unwrap();
1353        let back: ChatMessage = serde_json::from_str(&s).unwrap();
1354        assert_eq!(back.role, "user");
1355        assert_eq!(back.content, "hello");
1356    }
1357
1358    #[tokio::test]
1359    #[allow(deprecated)]
1360    async fn openrouter_chat_rejects_empty_key() {
1361        let err = openrouter_chat("", "x", vec![]).await.unwrap_err();
1362        assert!(err.to_string().contains("api key"));
1363    }
1364
1365    #[tokio::test]
1366    async fn check_already_running_returns_none_when_file_missing() {
1367        // Why: a fresh machine (no prior daemon) must skip the probe entirely
1368        // and let the caller proceed with normal startup.
1369        let tmp = tempfile_like_dir();
1370        let missing = tmp.join("does-not-exist");
1371        let got = check_already_running(&missing, "/health").await;
1372        assert!(got.is_none());
1373    }
1374
1375    #[tokio::test]
1376    async fn check_already_running_returns_none_when_file_empty() {
1377        // Why: a half-written / truncated address file should be treated as
1378        // "no daemon" and the stale file cleared so the next start does not
1379        // see it again.
1380        let tmp = tempfile_like_dir();
1381        let path = tmp.join("http_addr");
1382        std::fs::write(&path, "   \n  ").unwrap();
1383        let got = check_already_running(&path, "/health").await;
1384        assert!(got.is_none());
1385        assert!(
1386            !path.exists(),
1387            "empty address file should be cleaned up by check_already_running"
1388        );
1389    }
1390
1391    #[tokio::test]
1392    async fn check_already_running_returns_none_when_address_dead() {
1393        // Why: a stale address (daemon previously crashed) must NOT block a
1394        // fresh start; the helper must probe, see no listener, clear the file,
1395        // and report "no daemon".
1396        let tmp = tempfile_like_dir();
1397        let path = tmp.join("http_addr");
1398        // Reserved unbound port — TCP connect will fail fast.
1399        std::fs::write(&path, "127.0.0.1:1\n").unwrap();
1400        let got = check_already_running(&path, "/health").await;
1401        assert!(got.is_none(), "dead address should map to None");
1402        assert!(
1403            !path.exists(),
1404            "stale address file should be cleaned up by check_already_running"
1405        );
1406    }
1407
1408    #[tokio::test]
1409    async fn check_already_running_returns_url_when_health_ok() {
1410        // Why: positive control — when a daemon really is listening and
1411        // returns 2xx on the health path, the helper must report its URL so
1412        // the caller can refuse to spawn a duplicate.
1413        // What: spin up a one-shot mini HTTP server on an ephemeral port that
1414        // answers `GET /health → 200`, write the address to the file, and
1415        // confirm the helper returns the expected URL.
1416        let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap();
1417        let local = listener.local_addr().unwrap();
1418        let server = tokio::spawn(async move {
1419            use tokio::io::{AsyncReadExt, AsyncWriteExt};
1420            if let Ok((mut sock, _)) = listener.accept().await {
1421                let mut buf = [0u8; 1024];
1422                let _ = sock.read(&mut buf).await;
1423                let _ = sock
1424                    .write_all(b"HTTP/1.1 200 OK\r\nContent-Length: 2\r\n\r\nok")
1425                    .await;
1426                let _ = sock.shutdown().await;
1427            }
1428        });
1429
1430        let tmp = tempfile_like_dir();
1431        let path = tmp.join("http_addr");
1432        std::fs::write(&path, format!("{local}\n")).unwrap();
1433
1434        let got = check_already_running(&path, "/health").await;
1435        assert_eq!(got.as_deref(), Some(format!("http://{local}").as_str()));
1436        assert!(
1437            path.exists(),
1438            "address file must be preserved when the daemon is healthy"
1439        );
1440        let _ = server.await;
1441    }
1442
1443    // Test-only helper: makes a unique scratch dir without pulling in tempfile
1444    // as a dev-dep (keeps the dependency surface minimal).
1445    fn tempfile_like_dir() -> PathBuf {
1446        let pid = std::process::id();
1447        let nanos = std::time::SystemTime::now()
1448            .duration_since(std::time::UNIX_EPOCH)
1449            .map(|d| d.as_nanos())
1450            .unwrap_or(0);
1451        let p = std::env::temp_dir().join(format!("trusty-common-test-{pid}-{nanos}"));
1452        std::fs::create_dir_all(&p).unwrap();
1453        p
1454    }
1455}