Skip to main content

trusty_common/
lib.rs

1//! Shared utility surface for trusty-* projects.
2//!
3//! Why: Port auto-detect, data-directory resolution, tracing init, NO_COLOR
4//! handling, and the OpenRouter chat-completions client appeared in both
5//! trusty-memory and trusty-search with subtle divergence. Centralising keeps
6//! them aligned and gives future trusty-* binaries a one-import surface.
7//!
8//! What: pure utility functions — no global state. Each subsystem is a free
9//! function or a small helper struct.
10//!
11//! Test: `cargo test -p trusty-common` covers port walking, data-dir creation,
12//! and the OpenRouter request shape (without hitting the network).
13//!
14//! # Test isolation: `TRUSTY_DATA_DIR_OVERRIDE`
15//!
16//! macOS's [`dirs::data_dir()`] resolves the application-support directory via
17//! `NSFileManager`, a native Cocoa API that completely ignores the `HOME` and
18//! `XDG_DATA_HOME` environment variables. This makes it impossible to redirect
19//! data-directory access in tests using ordinary env-var tricks, because the
20//! kernel query bypasses the environment entirely.
21//!
22//! To work around this, [`resolve_data_dir`] checks the
23//! [`DATA_DIR_OVERRIDE_ENV`] (`TRUSTY_DATA_DIR_OVERRIDE`) environment variable
24//! before consulting `dirs::data_dir()`. When set, the variable's value is used
25//! as the base directory verbatim, and `dirs::data_dir()` is never called.
26//!
27//! **This escape hatch is intended for testing only.** Do not set it in
28//! production deployments; rely on the OS-standard data directory instead.
29
30use std::net::SocketAddr;
31use std::path::{Path, PathBuf};
32
33pub mod chat;
34pub mod claude_config;
35pub mod project_discovery;
36
37/// Bounded in-memory ring buffer of recent tracing log lines.
38///
39/// Why: trusty-* daemons expose a `/logs/tail` endpoint so operators can read
40/// recent logs over HTTP without file I/O or a daemon restart. The buffer and
41/// its `tracing_subscriber::Layer` live here so every daemon shares one impl.
42/// What: `LogBuffer` (thread-safe capped `VecDeque<String>`) plus
43/// `LogBufferLayer` (the tracing layer that feeds it).
44/// Test: `cargo test -p trusty-common log_buffer` covers capacity eviction,
45/// tail semantics, and layer capture.
46pub mod log_buffer;
47
48/// Process RSS / CPU sampling and data-directory sizing for daemon health.
49///
50/// Why: every trusty-* daemon's `/health` endpoint reports its own resident
51/// memory, CPU usage, and on-disk footprint; the sampling logic is identical
52/// across them so it lives here once.
53/// What: `SysMetrics` (per-process RSS + CPU sampler) and `dir_size_bytes`
54/// (recursive directory byte count).
55/// Test: `cargo test -p trusty-common sys_metrics`.
56pub mod sys_metrics;
57
58/// macOS LaunchAgent generation and lifecycle management. macOS-only —
59/// the module compiles to nothing on every other platform.
60#[cfg(target_os = "macos")]
61pub mod launchd;
62
63#[cfg(feature = "axum-server")]
64pub mod server;
65
66/// Shared JSON-RPC 2.0 / MCP primitives (formerly the `trusty-mcp-core` crate).
67///
68/// Why: Centralises `Request`/`Response`/`JsonRpcError` envelopes, the
69/// `initialize` response builder, an async stdio dispatch loop, and the
70/// OpenRPC `rpc.discover` helpers so every MCP server in the workspace
71/// imports the same types.
72/// What: Gated behind the `mcp` feature; pulls in no extra dependencies
73/// beyond `serde` / `tokio`, both of which are already required.
74/// Test: `cargo test -p trusty-common --features mcp` runs the module's
75/// own unit tests (envelope round-trips, stdio loop dispatch, OpenRPC
76/// builder shape).
77#[cfg(feature = "mcp")]
78pub mod mcp;
79
80/// General-purpose JSON-RPC client + transports (formerly the library half
81/// of the `trusty-rpc` crate).
82///
83/// Why: Both `trpc` (the CLI) and any future library consumer want one
84/// place that owns the JSON-RPC envelope construction, stdio-subprocess
85/// transport, HTTP transport, and pretty-printers.
86/// What: Gated behind the `rpc` feature; requires `uuid` for request id
87/// generation. The HTTP transport reuses the workspace `reqwest`.
88/// Test: `cargo test -p trusty-common --features rpc` runs the module's
89/// own unit tests (envelope extraction, pretty-print smoke tests).
90#[cfg(feature = "rpc")]
91pub mod rpc;
92
93/// Shared text-embedding abstraction (formerly the `trusty-embedder` crate).
94///
95/// Why: trusty-memory and trusty-search both ship near-identical `Embedder`
96/// traits and `FastEmbedder` implementations; centralising the surface here
97/// keeps them aligned and lets future consumers pick up embedding for free
98/// without a separate published crate.
99/// What: Gated behind the `embedder` feature. Exposes the `Embedder` trait,
100/// `FastEmbedder` (fastembed-rs, all-MiniLM-L6-v2, 384-d) with LRU caching
101/// and ORT warmup, and (under `embedder-test-support`) the `MockEmbedder`
102/// test double.
103/// Test: `cargo test -p trusty-common --features embedder,embedder-test-support`
104/// covers the mock embedder and ONNX-backed `#[ignore]`d integration tests.
105#[cfg(feature = "embedder")]
106pub mod embedder;
107
108/// Unified RPC client surface for the `trusty-embedderd` standalone process.
109///
110/// Why: absorbs both the former `trusty-embedder-client` HTTP crate (PR #163)
111/// and the former `embed_client` UDS module (PR #157) into a single unified
112/// module. Reduces workspace crate count and provides one trait (`EmbedderClient`)
113/// with three concrete implementations (InProcess, HTTP remote, UDS remote) so
114/// call sites are identical regardless of transport. The `embed-client` feature
115/// and `embed_client` module are retired by issue #164; use `embedder-client`
116/// and `trusty_common::embedder_client::UdsEmbedderClient` instead.
117/// What: Gated behind the `embedder-client` feature. Exposes the
118/// `EmbedderClient` trait, `InProcessEmbedderClient`, `RemoteEmbedderClient`
119/// (HTTP), `UdsEmbedderClient` (UDS), `EmbedRequest` / `EmbedResponse` wire
120/// types, and `EmbedderError`. The UDS impl uses `tokio::net::UnixStream`
121/// with newline-framed JSON-RPC 2.0 — no additional dependencies.
122/// Test: `cargo test -p trusty-common --features embedder-client` covers
123/// error-display, JSON round-trip, URL assembly, UDS wire types, and empty-
124/// batch short-circuits. ONNX-backed tests are in
125/// `trusty-embedderd/tests/bit_identical.rs` (`#[ignore]`).
126#[cfg(feature = "embedder-client")]
127pub mod embedder_client;
128
129/// Zero-dependency BM25 lexical index + code-aware tokenizer (issue #156).
130///
131/// Why: trusty-memory, trusty-search, and the per-palace
132/// `trusty-bm25-daemon` subprocess all want one shared BM25 implementation
133/// so the tokenizer's camelCase / PascalCase / alpha↔digit splits stay
134/// consistent across the workspace. Originally ported from open-mpm; now
135/// the single source of truth lives here.
136/// What: Gated behind the `bm25` feature. Adds no new dependencies — pure
137/// `std` + `tracing` (already required).
138/// Test: `cargo test -p trusty-common --features bm25`.
139#[cfg(feature = "bm25")]
140pub mod bm25;
141
142/// Reusable schema-migration kernel (issue #179).
143///
144/// Why: trusty-search, trusty-memory, and other long-lived stores have grown
145/// ad-hoc schema-migration loops that drift apart. Centralising the
146/// `SchemaVersion` newtype, the `Migration<S>` trait, and a `MigrationRunner`
147/// that applies pending steps in order (writing a stamp after each) collapses
148/// those into one shared kernel. The `file_stamp` helper covers the common
149/// "JSON sidecar in the store's data dir" stamp format; redb-stamp users get
150/// a documented recipe instead of a heavyweight dep.
151/// What: gated behind the `migrations` feature flag. Adds no new
152/// dependencies — pure `serde` + `serde_json` + `anyhow` + `tracing` which
153/// the crate already requires.
154/// Test: `cargo test -p trusty-common --features migrations` covers the
155/// runner ordering, crash resumption, write-stamp failure propagation, and
156/// the file-stamp round-trip / atomic-write behaviour.
157#[cfg(feature = "migrations")]
158pub mod migrations;
159
160/// UDS JSON-RPC client for the per-palace `trusty-bm25-daemon` subprocess
161/// (issue #156).
162///
163/// Why: trusty-memory needs a lexical-search lane without holding an
164/// in-process BM25 index. `Bm25Client` delegates to the per-palace daemon
165/// over `$TMPDIR/trusty-bm25-<palace>.sock`, matching the design of
166/// `EmbedClient` and `trusty-embed-daemon` (PR #157).
167/// What: Gated behind the `bm25-client` feature. Pure user of existing
168/// `tokio` / `serde_json` / `anyhow` workspace deps — adds no new
169/// dependencies.
170/// Test: `cargo test -p trusty-common --features bm25-client` covers
171/// request shape and path defaults; end-to-end coverage lives in
172/// `trusty-bm25-daemon/tests/`.
173#[cfg(feature = "bm25-client")]
174pub mod bm25_client;
175
176/// Symbol-graph engine (formerly the `trusty-symgraph` crate).
177///
178/// Why: All trusty-* tools that touch source code (open-mpm, trusty-search,
179/// trusty-analyze) want the same `EntityType` / `RawEntity` / `EdgeKind`
180/// data shapes and (for orchestrators) the same tree-sitter pipeline. Living
181/// here lets the workspace ship one tree-sitter `links =` slot instead of
182/// juggling two crates that both claim it.
183/// What: Gated behind two features. `symgraph` exposes only the contracts
184/// surface (`EntityType`, `RawEntity`, `EdgeKind`, `fact_hash_str`, tables)
185/// — no tree-sitter, no `links` conflict. `symgraph-parser` additionally
186/// pulls in tree-sitter and the full parse → registry → emit stack.
187/// `symgraph-server` enables the HTTP server frontend.
188/// Test: `cargo test -p trusty-common --features symgraph` exercises the
189/// contracts surface; `cargo test -p trusty-symgraph` covers the parser
190/// path through the thin re-export shim.
191#[cfg(feature = "symgraph")]
192pub mod symgraph;
193
194/// Memory Palace storage engine (formerly the `trusty-memory-core` crate).
195///
196/// Why: Centralises the Memory Palace data model (`Palace` / `Wing` /
197/// `Room` / `Drawer`), storage backends (usearch vector index + SQLite
198/// knowledge graph + chat-session log + payload store), retrieval handle,
199/// and the dream / decay / analytics / git-history surfaces so every
200/// trusty-* binary that talks to a palace reuses the same types. Absorbed
201/// into `trusty-common` (issue #5 phase 2d) so we ship one fewer published
202/// crate.
203/// What: Gated behind the `memory-core` feature because it pulls in heavy
204/// storage deps (`usearch`, `rusqlite`, `r2d2`, `git2`, `kuzu`). Enables
205/// the embedder surface automatically (memory-core → embedder).
206/// Test: `cargo test -p trusty-common --features memory-core` exercises
207/// the full surface.
208#[cfg(feature = "memory-core")]
209pub mod memory_core;
210
211/// Unified ticketing MCP server (formerly the `trusty-tickets` crate).
212///
213/// Why: Claude Code and the rest of the trusty-* suite need a single MCP
214/// surface that can talk to GitHub Issues, JIRA, and Linear without the
215/// caller needing to know which backend is configured. Absorbing into
216/// `trusty-common` reduces the workspace crate count and co-locates the
217/// HTTP client surface with the other protocol helpers.
218/// What: Gated behind the `tickets` feature. Exposes `tickets::api::*`
219/// (config, models, Backend trait, three concrete backends), `tickets::server`
220/// (MCP dispatch loop + `run_stdio`), and `tickets::tools` (the tool-list
221/// schema). Requires the `mcp` feature for the stdio loop.
222/// Test: `cargo test -p trusty-common --features tickets` runs the module's
223/// own unit tests (dispatch, tool-list counts, config parsing, serde
224/// round-trips). Live backend tests require env-var credentials.
225#[cfg(feature = "tickets")]
226pub mod tickets;
227
228/// Declarative CLI help system with "did you mean?" suggestions (issue #216).
229///
230/// Why: every standalone trusty-* binary used to render its `--help` and
231/// unknown-subcommand error output independently, so the formats drifted
232/// apart over time. Centralising the help model into one YAML schema, one
233/// canonical renderer, and one Jaro-Winkler suggester keeps the six binaries
234/// (search, memory, analyze, mpm-cli, tga, open-mpm) speaking with a single
235/// user-facing voice.
236/// What: gated behind the `cli-help` feature. Pulls in `serde_yaml`, `strsim`,
237/// and `indexmap`. Exposes `HelpConfig` / `CommandDef` / `FlagDef` / `Example`
238/// + `load_help` / `render_help` / `suggest`.
239/// Test: `cargo test -p trusty-common --features cli-help`.
240#[cfg(feature = "cli-help")]
241pub mod help;
242
243/// Unified monitor TUI for the trusty-search and trusty-memory daemons
244/// (formerly the `trusty-monitor-tui` crate).
245///
246/// Why: operators run both daemons and want one terminal surface that shows
247/// the health of both at a glance. Living here behind the `monitor-tui`
248/// feature flag matches the workspace's "one fewer published crate" direction
249/// (issue #31 companion) and keeps the dashboard logic unit-testable.
250/// What: gated behind the `monitor-tui` feature, which pulls in `ratatui` and
251/// `crossterm`. Exposes `monitor::run` (the entry point the `trusty-monitor`
252/// binary calls) plus the pure `dashboard` / `search_client` / `memory_client`
253/// submodules.
254/// Test: `cargo test -p trusty-common --features monitor-tui` covers the
255/// rendering, layout, and HTTP-client pieces.
256#[cfg(feature = "monitor-tui")]
257pub mod monitor;
258
259pub use chat::{
260    ChatEvent, ChatProvider, LocalModelConfig, OllamaProvider, OpenRouterProvider, ToolCall,
261    ToolDef, auto_detect_local_provider,
262};
263
264use anyhow::{Context, Result, anyhow};
265use serde::{Deserialize, Serialize};
266use tokio::net::TcpListener;
267
268// ─── Port binding ─────────────────────────────────────────────────────────
269
270/// Bind to `addr`; if the port is in use, walk forward up to `max_attempts`
271/// ports and return the first listener that binds.
272///
273/// Why: Running multiple instances of a trusty-* daemon (or restarting before
274/// the kernel releases the prior socket) shouldn't produce a noisy failure —
275/// auto-incrementing gives a friendlier developer experience while still
276/// honouring the user's preferred starting port.
277/// What: returns the first successful `tokio::net::TcpListener`. Callers can
278/// inspect `local_addr()` to discover where it landed and report it however
279/// they prefer — this function does not perform any I/O on stdout/stderr.
280/// `max_attempts == 0` means "try `addr` exactly once".
281/// Test: `auto_port_walks_forward` binds a port, then calls this with the
282/// occupied port and confirms a different free port is returned.
283pub async fn bind_with_auto_port(addr: SocketAddr, max_attempts: u16) -> Result<TcpListener> {
284    use std::io::ErrorKind;
285    let mut current = addr;
286    for attempt in 0..=max_attempts {
287        match TcpListener::bind(current).await {
288            Ok(l) => return Ok(l),
289            Err(e) if e.kind() == ErrorKind::AddrInUse && attempt < max_attempts => {
290                let next_port = current.port().saturating_add(1);
291                if next_port == 0 {
292                    anyhow::bail!("ran out of ports while searching for free slot");
293                }
294                tracing::warn!("port {} in use, trying {}", current.port(), next_port);
295                current.set_port(next_port);
296            }
297            Err(e) => return Err(e.into()),
298        }
299    }
300    anyhow::bail!("could not find free port after {max_attempts} attempts")
301}
302
303// ─── Data directory ───────────────────────────────────────────────────────
304
305/// Environment variable name for the data-directory test escape hatch.
306///
307/// Why: macOS's `dirs::data_dir()` delegates to `NSFileManager`, a native Cocoa
308/// API that ignores `HOME` and `XDG_DATA_HOME`. Setting `HOME` in a test process
309/// does **not** redirect `dirs::data_dir()` on macOS, making path isolation
310/// impossible without a separate bypass. This constant names that bypass.
311///
312/// What: When `TRUSTY_DATA_DIR_OVERRIDE` is set in the environment,
313/// [`resolve_data_dir`] uses its value as the base directory and skips the
314/// `dirs::data_dir()` call entirely. The final path is
315/// `${TRUSTY_DATA_DIR_OVERRIDE}/<app_name>`, identical in structure to the
316/// normal OS-standard path.
317///
318/// **Intended for tests only.** Do not set this variable in production; it
319/// bypasses the OS-standard application-data directory.
320///
321/// Test: All `resolve_data_dir` tests in this module set this var to a
322/// temporary directory so they run identically on macOS, Linux, and Windows.
323pub const DATA_DIR_OVERRIDE_ENV: &str = "TRUSTY_DATA_DIR_OVERRIDE";
324
325/// Resolve `<data_dir>/<app_name>`, creating it if it doesn't exist.
326///
327/// Why: All trusty-* tools want a per-machine, per-app directory under the
328/// OS-standard data dir (`~/Library/Application Support/`, `~/.local/share/`,
329/// `%APPDATA%/`). If `dirs::data_dir()` is unavailable (rare — locked-down
330/// containers), falls back to `~/.<app_name>` so the tool still works.
331///
332/// The [`DATA_DIR_OVERRIDE_ENV`] (`TRUSTY_DATA_DIR_OVERRIDE`) environment
333/// variable provides a test escape hatch: when set, `dirs::data_dir()` is
334/// **never called** and the variable's value is used as the base directory
335/// instead. This is necessary because macOS's `dirs::data_dir()` calls
336/// `NSFileManager` — a native Cocoa API that resolves the application-support
337/// directory through the system rather than through the process environment —
338/// so setting `HOME` or `XDG_DATA_HOME` in a test process does not redirect
339/// it. `TRUSTY_DATA_DIR_OVERRIDE` is the only reliable cross-platform way to
340/// isolate test data paths. **It is intended for tests only; do not set it in
341/// production.**
342///
343/// What: returns the absolute path `${base}/<app_name>` (created if absent).
344/// Resolution order:
345/// 1. `$TRUSTY_DATA_DIR_OVERRIDE/<app_name>` — when the env var is set.
346/// 2. `$(dirs::data_dir())/<app_name>` — normal OS-standard path.
347/// 3. `~/.<app_name>` — fallback when `dirs::data_dir()` returns `None`.
348///
349/// Test: `resolve_data_dir_creates_directory` pins a temporary directory via
350/// `TRUSTY_DATA_DIR_OVERRIDE` and asserts that the returned path is created
351/// under it, exercising both the override path and directory-creation logic.
352pub fn resolve_data_dir(app_name: &str) -> Result<PathBuf> {
353    let base = if let Ok(override_dir) = std::env::var(DATA_DIR_OVERRIDE_ENV) {
354        PathBuf::from(override_dir)
355    } else {
356        dirs::data_dir()
357            .or_else(|| dirs::home_dir().map(|h| h.join(format!(".{app_name}"))))
358            .context("could not resolve data directory or home directory")?
359    };
360    let dir = if base.ends_with(format!(".{app_name}")) {
361        base
362    } else {
363        base.join(app_name)
364    };
365    std::fs::create_dir_all(&dir)
366        .with_context(|| format!("create data directory {}", dir.display()))?;
367    Ok(dir)
368}
369
370// ─── Daemon address file ──────────────────────────────────────────────────
371
372/// Filename used inside each app's data directory to record the daemon's
373/// bound HTTP address. Kept as a module-level constant so writers and readers
374/// can't drift.
375const DAEMON_ADDR_FILENAME: &str = "http_addr";
376
377/// Write the daemon's bound HTTP address to the app's data directory.
378///
379/// Why: Both trusty-search and trusty-memory persist their bound `host:port`
380/// to disk so MCP clients (and follow-up CLI invocations) can discover where
381/// the daemon ended up after auto-port-walking. Centralising the path layout
382/// keeps the two projects in sync and prevents a third trusty-* daemon from
383/// inventing yet another location.
384/// What: writes `addr` verbatim (no trailing newline) to
385/// `{resolve_data_dir(app_name)}/http_addr`, creating the directory if it
386/// doesn't yet exist. Atomic-overwrite semantics aren't required — the file
387/// is rewritten on every daemon start.
388/// Test: `daemon_addr_round_trips` writes then reads under a stubbed HOME and
389/// confirms equality.
390pub fn write_daemon_addr(app_name: &str, addr: &str) -> Result<()> {
391    let dir = resolve_data_dir(app_name)?;
392    let path = dir.join(DAEMON_ADDR_FILENAME);
393    std::fs::write(&path, addr).with_context(|| format!("write daemon addr to {}", path.display()))
394}
395
396/// Read the daemon's HTTP address from the app's data directory.
397///
398/// Why: CLI commands and MCP clients need to discover the running daemon's
399/// bound port. Returning `Option` lets callers distinguish "daemon never
400/// started" (file absent) from "filesystem error" (permission denied, etc.)
401/// without resorting to string matching on error messages.
402/// What: reads `{resolve_data_dir(app_name)}/http_addr`, trims surrounding
403/// whitespace, and returns `Some(addr)`. Returns `Ok(None)` iff the file
404/// does not exist; any other I/O error propagates as `Err`.
405/// Test: `daemon_addr_round_trips` and `read_daemon_addr_missing_returns_none`.
406pub fn read_daemon_addr(app_name: &str) -> Result<Option<String>> {
407    let dir = resolve_data_dir(app_name)?;
408    let path = dir.join(DAEMON_ADDR_FILENAME);
409    match std::fs::read_to_string(&path) {
410        Ok(s) => Ok(Some(s.trim().to_string())),
411        Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(None),
412        Err(e) => Err(anyhow::Error::new(e))
413            .with_context(|| format!("read daemon addr from {}", path.display())),
414    }
415}
416
417// ─── Already-running guard ────────────────────────────────────────────────
418
419/// Issue a short-timeout `GET {base_url}{health_path}` and report whether it
420/// returns a 2xx response.
421///
422/// Why: every trusty-* daemon's "is one already running?" check follows the
423/// same shape — probe the recorded address for `/health` with a tight timeout
424/// so a dead daemon does not block the start command for the discovery
425/// timeout. Lifting the probe into one helper keeps the request/timeout
426/// configuration identical across `check_already_running` (file-based) and the
427/// trusty-mpm lock-file path (where the URL is derived from a TOML file).
428/// What: builds a `reqwest::Client` with a 1 s request timeout, issues the GET,
429/// returns `true` only when the response is HTTP 2xx. Any client-builder error
430/// or transport failure returns `false`.
431/// Test: covered indirectly via `check_already_running_*` and the three daemon
432/// integration paths.
433pub async fn probe_health(base_url: &str, health_path: &str) -> bool {
434    let probe = format!("{base_url}{health_path}");
435    let client = match reqwest::Client::builder()
436        .timeout(std::time::Duration::from_secs(1))
437        .build()
438    {
439        Ok(c) => c,
440        Err(_) => return false,
441    };
442    matches!(client.get(&probe).send().await, Ok(resp) if resp.status().is_success())
443}
444
445/// Probe whether an existing daemon recorded at `addr_file` is healthy and,
446/// if so, return its base URL so the caller can refuse to start a duplicate.
447///
448/// Why: every trusty-* daemon (search, memory, mpm) historically port-walked on
449/// boot. Invoking the `start` / `serve` command a second time silently spawned
450/// a second instance on the next free port — splitting traffic between two
451/// stores, doubling RSS, and confusing every client that resolves the address
452/// from disk. The CLI must read the recorded address, ask the live process for
453/// `/health`, and if both succeed report "already running" and exit 0 rather
454/// than racing a duplicate process against the port walker. A shared helper
455/// keeps the three daemons honest — drift here is the bug we are fixing.
456/// What: returns `Some("http://<addr>")` only when (a) `addr_file` exists and
457/// is readable, (b) its trimmed contents parse as a non-empty `host:port`, and
458/// (c) an HTTP `GET http://<addr><health_path>` returns a 2xx within ~1.5 s
459/// (1 s request timeout plus tokio scheduling slack). Returns `None` on every
460/// other outcome — missing file, unreadable contents, dead address, non-2xx
461/// response — so the caller treats that as "no live daemon, proceed".
462/// Side-effect (stale-file cleanup): when the file exists but the health probe
463/// fails (or the file is empty / malformed), the function best-effort deletes
464/// it via `std::fs::remove_file` so the next caller does not chase the same
465/// dead address. A delete failure is intentionally ignored.
466/// Test: `check_already_running_returns_none_when_file_missing`,
467/// `check_already_running_returns_none_when_file_empty`,
468/// `check_already_running_returns_none_when_address_dead`,
469/// `check_already_running_returns_url_when_health_ok`.
470pub async fn check_already_running(addr_file: &Path, health_path: &str) -> Option<String> {
471    let raw = match std::fs::read_to_string(addr_file) {
472        Ok(s) => s,
473        Err(_) => return None,
474    };
475    let addr = raw.trim();
476    if addr.is_empty() {
477        // Empty / whitespace-only file is treated as stale — best-effort delete.
478        let _ = std::fs::remove_file(addr_file);
479        return None;
480    }
481    let url = format!("http://{addr}");
482    if probe_health(&url, health_path).await {
483        Some(url)
484    } else {
485        // Stale file pointing at a dead address. Clear it so the next start
486        // attempt is not blocked by a probe against the dead URL.
487        let _ = std::fs::remove_file(addr_file);
488        None
489    }
490}
491
492// ─── CLI initialisation ───────────────────────────────────────────────────
493
494/// Initialise the global tracing subscriber.
495///
496/// Why: Every trusty-* binary wants the same verbosity ladder and the same
497/// `RUST_LOG` override semantics. Defining it once removes the boilerplate
498/// from every `main.rs`.
499/// What: `verbose_count` maps `0 → warn`, `1 → info`, `2 → debug`, `3+ →
500/// trace`. If `RUST_LOG` is set in the environment it wins. Logs go to
501/// stderr so stdout stays clean for MCP JSON-RPC.
502/// Test: side-effecting (global subscriber) — covered by integration with
503/// `cargo run -- -v status` in downstream crates.
504pub fn init_tracing(verbose_count: u8) {
505    let default_filter = match verbose_count {
506        0 => "warn",
507        1 => "info",
508        2 => "debug",
509        _ => "trace",
510    };
511    let filter = tracing_subscriber::EnvFilter::try_from_default_env()
512        .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new(default_filter));
513    // try_init so callers that pre-install a subscriber don't panic.
514    let _ = tracing_subscriber::fmt()
515        .with_env_filter(filter)
516        .with_writer(std::io::stderr)
517        .with_target(false)
518        .try_init();
519}
520
521/// Initialise the global tracing subscriber and capture events into a
522/// [`log_buffer::LogBuffer`] so the daemon can serve recent logs over HTTP.
523///
524/// Why: daemons expose `GET /logs/tail`, which needs an in-memory ring of
525/// recent log lines. Routing capture through the subscriber means every
526/// existing `tracing::info!` / `warn!` call site is mirrored automatically —
527/// no second logging API to keep in sync. The stderr `fmt` layer is retained
528/// so operators still see live logs in the terminal / launchd log file.
529/// What: builds a `tracing_subscriber::registry` with two layers — the
530/// standard stderr `fmt` layer (same verbosity ladder + `RUST_LOG` override
531/// as [`init_tracing`]) and a [`log_buffer::LogBufferLayer`] feeding the
532/// returned [`log_buffer::LogBuffer`]. Uses `try_init`, so a process that has
533/// already installed a subscriber keeps it; the returned buffer is still
534/// valid (just empty) in that case.
535/// Test: `cargo test -p trusty-common log_buffer` covers the layer; the
536/// daemon `/logs/tail` integration tests cover the wired path end-to-end.
537#[must_use]
538pub fn init_tracing_with_buffer(verbose_count: u8, capacity: usize) -> log_buffer::LogBuffer {
539    use tracing_subscriber::Layer as _;
540    use tracing_subscriber::layer::SubscriberExt;
541    use tracing_subscriber::util::SubscriberInitExt;
542
543    let default_filter = match verbose_count {
544        0 => "warn",
545        1 => "info",
546        2 => "debug",
547        _ => "trace",
548    };
549    // Stderr filter follows the same verbosity ladder + `RUST_LOG` override as
550    // `init_tracing` so terminal output stays compact at the operator's chosen
551    // level.
552    let stderr_filter = tracing_subscriber::EnvFilter::try_from_default_env()
553        .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new(default_filter));
554
555    // The log-buffer layer must capture activity even when the stderr filter
556    // is set to `warn` (the default for `trusty-search start` without `-v`).
557    // Operators reading `/logs/tail` expect to see info-level lifecycle events
558    // (file-watcher reindexes, startup scans). Without a separate filter the
559    // global stderr filter would suppress them before they reach the buffer.
560    // `RUST_LOG_BUFFER` lets ops widen or narrow the buffer independently of
561    // stderr; the default of `info` matches the activity feed's intent.
562    let buffer_filter = tracing_subscriber::EnvFilter::try_from_env("RUST_LOG_BUFFER")
563        .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info"));
564
565    let buffer = log_buffer::LogBuffer::new(capacity);
566    let fmt_layer = tracing_subscriber::fmt::layer()
567        .with_writer(std::io::stderr)
568        .with_target(false)
569        .with_filter(stderr_filter);
570    let buf_layer = log_buffer::LogBufferLayer::new(buffer.clone()).with_filter(buffer_filter);
571    // try_init so callers that pre-install a subscriber don't panic — the
572    // returned buffer simply stays empty in that (rare) case.
573    let _ = tracing_subscriber::registry()
574        .with(fmt_layer)
575        .with(buf_layer)
576        .try_init();
577    buffer
578}
579
580/// Disable coloured terminal output when requested or when stdout is not a TTY.
581///
582/// Why: Pipe-friendly output is mandatory for scripting (`trusty-search list
583/// | jq …`). `NO_COLOR` / `TERM=dumb` are the canonical signals; passing
584/// `--no-color` should override too.
585/// What: calls `colored::control::set_override(false)` when the caller asks
586/// for it or when the standard heuristics indicate no colour.
587/// Test: side-effecting global; trivially covered by manual `NO_COLOR=1 cargo
588/// run -- list`.
589pub fn maybe_disable_color(no_color: bool) {
590    let env_says_no =
591        std::env::var("NO_COLOR").is_ok() || std::env::var("TERM").as_deref() == Ok("dumb");
592    if no_color || env_says_no {
593        colored::control::set_override(false);
594    }
595}
596
597// ─── OpenRouter ───────────────────────────────────────────────────────────
598
599const OPENROUTER_URL: &str = "https://openrouter.ai/api/v1/chat/completions";
600const HTTP_REFERER: &str = "https://github.com/bobmatnyc/trusty-common";
601const X_TITLE: &str = "trusty-common";
602const OPENROUTER_CONNECT_TIMEOUT_SECS: u64 = 10;
603const OPENROUTER_REQUEST_TIMEOUT_SECS: u64 = 120; // chat completions can take 60–90s
604
605/// OpenAI-compatible chat message.
606///
607/// Why: Both trusty-memory's `chat` subcommand and trusty-search's `/chat`
608/// endpoint speak the OpenRouter format. Sharing the struct keeps them in
609/// step (and lets callers compose chat histories without re-defining types).
610/// Tool-use additions (`tool_call_id`, `tool_calls`) follow the OpenAI
611/// function-calling shape: assistant messages set `tool_calls` when the model
612/// requests tool invocations; subsequent `role: "tool"` messages echo the
613/// matching `tool_call_id` with the tool's result in `content`.
614/// What: `role` is one of `"system" | "user" | "assistant" | "tool"`.
615/// `content` is the message text. `tool_call_id` is the id of the tool call
616/// this message is replying to (only set when `role == "tool"`). `tool_calls`
617/// is the raw OpenAI `tool_calls` array on an assistant message that asked
618/// to invoke tools — kept as `serde_json::Value` so we don't drop any fields
619/// the upstream may add.
620/// Test: serde round-trip in `chat_message_round_trips`.
621#[derive(Debug, Clone, Serialize, Deserialize)]
622pub struct ChatMessage {
623    pub role: String,
624    pub content: String,
625    #[serde(skip_serializing_if = "Option::is_none", default)]
626    pub tool_call_id: Option<String>,
627    #[serde(skip_serializing_if = "Option::is_none", default)]
628    pub tool_calls: Option<Vec<serde_json::Value>>,
629}
630
631#[derive(Debug, Serialize)]
632struct ChatRequest<'a> {
633    model: &'a str,
634    messages: &'a [ChatMessage],
635    stream: bool,
636}
637
638#[derive(Debug, Deserialize)]
639struct ChatResponse {
640    choices: Vec<Choice>,
641}
642
643#[derive(Debug, Deserialize)]
644struct Choice {
645    message: ResponseMessage,
646}
647
648#[derive(Debug, Deserialize)]
649struct ResponseMessage {
650    #[serde(default)]
651    content: String,
652}
653
654/// Send a chat completion request to OpenRouter and return the assistant's
655/// message content.
656///
657/// Why: A one-shot, non-streaming chat call is the common-case helper — used
658/// by trusty-memory's `chat` CLI and trusty-search's `/chat` endpoint.
659/// What: POSTs `{model, messages, stream: false}` to OpenRouter with bearer
660/// auth, decodes the response, and returns `choices[0].message.content`.
661/// Errors propagate as anyhow with HTTP status context.
662/// Test: error paths covered by `openrouter_propagates_http_errors` (uses a
663/// blackhole base URL — no real call).
664#[deprecated(since = "0.3.1", note = "Use OpenRouterProvider::chat_stream instead")]
665pub async fn openrouter_chat(
666    api_key: &str,
667    model: &str,
668    messages: Vec<ChatMessage>,
669) -> Result<String> {
670    if api_key.is_empty() {
671        return Err(anyhow!("openrouter api key is empty"));
672    }
673    let client = reqwest::Client::builder()
674        .connect_timeout(std::time::Duration::from_secs(
675            OPENROUTER_CONNECT_TIMEOUT_SECS,
676        ))
677        .timeout(std::time::Duration::from_secs(
678            OPENROUTER_REQUEST_TIMEOUT_SECS,
679        ))
680        .build()
681        .context("build reqwest client for openrouter_chat")?;
682    let body = ChatRequest {
683        model,
684        messages: &messages,
685        stream: false,
686    };
687    let resp = client
688        .post(OPENROUTER_URL)
689        .bearer_auth(api_key)
690        .header("HTTP-Referer", HTTP_REFERER)
691        .header("X-Title", X_TITLE)
692        .json(&body)
693        .send()
694        .await
695        .context("POST openrouter chat completions")?;
696    let status = resp.status();
697    if !status.is_success() {
698        let text = resp.text().await.unwrap_or_default();
699        return Err(anyhow!("openrouter HTTP {status}: {text}"));
700    }
701    let payload: ChatResponse = resp.json().await.context("decode openrouter response")?;
702    payload
703        .choices
704        .into_iter()
705        .next()
706        .map(|c| c.message.content)
707        .ok_or_else(|| anyhow!("openrouter returned no choices"))
708}
709
710/// Stream chat-completion deltas from OpenRouter through a tokio mpsc channel.
711///
712/// Why: `chat` UIs want incremental tokens for a responsive feel; the
713/// streaming endpoint emits SSE `data:` frames with delta content.
714/// What: POSTs the request with `stream: true`, parses each SSE `data:` line
715/// as a JSON object, extracts `choices[0].delta.content`, and sends each
716/// non-empty chunk to `tx`. The function returns when the stream terminates
717/// (either by `[DONE]` sentinel or by upstream EOF).
718/// Test: integration-only (no offline mock); covered manually via the
719/// trusty-search `/chat` endpoint that re-uses this helper.
720#[deprecated(since = "0.3.1", note = "Use OpenRouterProvider::chat_stream instead")]
721pub async fn openrouter_chat_stream(
722    api_key: &str,
723    model: &str,
724    messages: Vec<ChatMessage>,
725    tx: tokio::sync::mpsc::Sender<String>,
726) -> Result<()> {
727    use futures_util::StreamExt;
728
729    if api_key.is_empty() {
730        return Err(anyhow!("openrouter api key is empty"));
731    }
732    let client = reqwest::Client::builder()
733        .connect_timeout(std::time::Duration::from_secs(
734            OPENROUTER_CONNECT_TIMEOUT_SECS,
735        ))
736        .timeout(std::time::Duration::from_secs(
737            OPENROUTER_REQUEST_TIMEOUT_SECS,
738        ))
739        .build()
740        .context("build reqwest client for openrouter_chat_stream")?;
741    let body = ChatRequest {
742        model,
743        messages: &messages,
744        stream: true,
745    };
746    let resp = client
747        .post(OPENROUTER_URL)
748        .bearer_auth(api_key)
749        .header("HTTP-Referer", HTTP_REFERER)
750        .header("X-Title", X_TITLE)
751        .json(&body)
752        .send()
753        .await
754        .context("POST openrouter chat completions (stream)")?;
755    let status = resp.status();
756    if !status.is_success() {
757        let text = resp.text().await.unwrap_or_default();
758        return Err(anyhow!("openrouter HTTP {status}: {text}"));
759    }
760
761    let mut buf = String::new();
762    let mut stream = resp.bytes_stream();
763    while let Some(chunk) = stream.next().await {
764        let bytes = chunk.context("read openrouter stream chunk")?;
765        let text = match std::str::from_utf8(&bytes) {
766            Ok(s) => s,
767            Err(_) => continue,
768        };
769        buf.push_str(text);
770
771        while let Some(idx) = buf.find('\n') {
772            let line: String = buf.drain(..=idx).collect();
773            let line = line.trim();
774            let Some(payload) = line.strip_prefix("data:").map(str::trim) else {
775                continue;
776            };
777            if payload.is_empty() || payload == "[DONE]" {
778                continue;
779            }
780            let v: serde_json::Value = match serde_json::from_str(payload) {
781                Ok(v) => v,
782                Err(_) => continue,
783            };
784            if let Some(delta) = v
785                .get("choices")
786                .and_then(|c| c.get(0))
787                .and_then(|c| c.get("delta"))
788                .and_then(|d| d.get("content"))
789                .and_then(|c| c.as_str())
790                && !delta.is_empty()
791                && tx.send(delta.to_string()).await.is_err()
792            {
793                // Receiver dropped — caller has lost interest.
794                return Ok(());
795            }
796        }
797    }
798    Ok(())
799}
800
801// ─── Misc helpers ─────────────────────────────────────────────────────────
802
803/// Check whether a path exists and is a directory.
804///
805/// Why: tiny but commonly-needed shim — clearer at call sites than
806/// `path.exists() && path.is_dir()`.
807/// What: returns `true` iff the path exists and metadata reports a directory.
808/// Test: `is_dir_recognises_directories`.
809pub fn is_dir(path: &Path) -> bool {
810    path.metadata().map(|m| m.is_dir()).unwrap_or(false)
811}
812
813#[cfg(test)]
814mod tests {
815    use super::*;
816    use std::sync::Mutex;
817
818    /// Serialises tests that mutate the `TRUSTY_DATA_DIR_OVERRIDE` env var so
819    /// they don't race when `cargo test` runs them in parallel threads.
820    static ENV_LOCK: Mutex<()> = Mutex::new(());
821
822    #[tokio::test]
823    async fn auto_port_walks_forward() {
824        // Bind to an OS-chosen port, then ask auto-port to start there.
825        let occupied = TcpListener::bind("127.0.0.1:0").await.unwrap();
826        let port = occupied.local_addr().unwrap().port();
827        let addr: SocketAddr = format!("127.0.0.1:{port}").parse().unwrap();
828        let next = bind_with_auto_port(addr, 8).await.unwrap();
829        let got = next.local_addr().unwrap().port();
830        assert_ne!(got, port, "expected walk-forward to a different port");
831    }
832
833    #[tokio::test]
834    async fn auto_port_zero_attempts_still_binds_free() {
835        let addr: SocketAddr = "127.0.0.1:0".parse().unwrap();
836        let l = bind_with_auto_port(addr, 0).await.unwrap();
837        assert!(l.local_addr().unwrap().port() > 0);
838    }
839
840    #[test]
841    fn resolve_data_dir_creates_directory() {
842        let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
843        // Use the override env var so we deterministically control the base
844        // directory cross-platform (macOS's dirs::data_dir ignores HOME).
845        let tmp = tempfile_like_dir();
846        // SAFETY: env mutation; tests in this module run serially via
847        // #[test] threading isolation only when MUTEX-guarded — we accept
848        // the residual risk since the override var is unique to these tests.
849        unsafe {
850            std::env::set_var(DATA_DIR_OVERRIDE_ENV, &tmp);
851        }
852        let dir = resolve_data_dir("trusty-test-xyz").unwrap();
853        assert!(
854            dir.exists(),
855            "data dir should be created at {}",
856            dir.display()
857        );
858        assert!(dir.is_dir());
859        assert!(
860            dir.starts_with(&tmp),
861            "data dir {} should live under override {}",
862            dir.display(),
863            tmp.display()
864        );
865        unsafe {
866            std::env::remove_var(DATA_DIR_OVERRIDE_ENV);
867        }
868    }
869
870    #[test]
871    fn daemon_addr_round_trips() {
872        let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
873        let tmp = tempfile_like_dir();
874        // SAFETY: env mutation; see note in resolve_data_dir_creates_directory.
875        unsafe {
876            std::env::set_var(DATA_DIR_OVERRIDE_ENV, &tmp);
877        }
878        let app = format!(
879            "trusty-test-daemon-{}-{}",
880            std::process::id(),
881            std::time::SystemTime::now()
882                .duration_since(std::time::UNIX_EPOCH)
883                .map(|d| d.as_nanos())
884                .unwrap_or(0)
885        );
886        write_daemon_addr(&app, "127.0.0.1:12345").unwrap();
887        let got = read_daemon_addr(&app).unwrap();
888        unsafe {
889            std::env::remove_var(DATA_DIR_OVERRIDE_ENV);
890        }
891        assert_eq!(got.as_deref(), Some("127.0.0.1:12345"));
892    }
893
894    #[test]
895    fn read_daemon_addr_missing_returns_none() {
896        let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
897        let tmp = tempfile_like_dir();
898        // SAFETY: env mutation; see note in resolve_data_dir_creates_directory.
899        unsafe {
900            std::env::set_var(DATA_DIR_OVERRIDE_ENV, &tmp);
901        }
902        let app = format!(
903            "trusty-test-daemon-missing-{}-{}",
904            std::process::id(),
905            std::time::SystemTime::now()
906                .duration_since(std::time::UNIX_EPOCH)
907                .map(|d| d.as_nanos())
908                .unwrap_or(0)
909        );
910        let got = read_daemon_addr(&app).unwrap();
911        unsafe {
912            std::env::remove_var(DATA_DIR_OVERRIDE_ENV);
913        }
914        assert!(got.is_none(), "expected None when file absent, got {got:?}");
915    }
916
917    #[test]
918    fn is_dir_recognises_directories() {
919        let tmp = tempfile_like_dir();
920        assert!(is_dir(&tmp));
921        assert!(!is_dir(&tmp.join("nope")));
922    }
923
924    #[test]
925    fn chat_message_round_trips() {
926        let m = ChatMessage {
927            role: "user".into(),
928            content: "hello".into(),
929            tool_call_id: None,
930            tool_calls: None,
931        };
932        let s = serde_json::to_string(&m).unwrap();
933        let back: ChatMessage = serde_json::from_str(&s).unwrap();
934        assert_eq!(back.role, "user");
935        assert_eq!(back.content, "hello");
936    }
937
938    #[tokio::test]
939    #[allow(deprecated)]
940    async fn openrouter_chat_rejects_empty_key() {
941        let err = openrouter_chat("", "x", vec![]).await.unwrap_err();
942        assert!(err.to_string().contains("api key"));
943    }
944
945    #[tokio::test]
946    async fn check_already_running_returns_none_when_file_missing() {
947        // Why: a fresh machine (no prior daemon) must skip the probe entirely
948        // and let the caller proceed with normal startup.
949        let tmp = tempfile_like_dir();
950        let missing = tmp.join("does-not-exist");
951        let got = check_already_running(&missing, "/health").await;
952        assert!(got.is_none());
953    }
954
955    #[tokio::test]
956    async fn check_already_running_returns_none_when_file_empty() {
957        // Why: a half-written / truncated address file should be treated as
958        // "no daemon" and the stale file cleared so the next start does not
959        // see it again.
960        let tmp = tempfile_like_dir();
961        let path = tmp.join("http_addr");
962        std::fs::write(&path, "   \n  ").unwrap();
963        let got = check_already_running(&path, "/health").await;
964        assert!(got.is_none());
965        assert!(
966            !path.exists(),
967            "empty address file should be cleaned up by check_already_running"
968        );
969    }
970
971    #[tokio::test]
972    async fn check_already_running_returns_none_when_address_dead() {
973        // Why: a stale address (daemon previously crashed) must NOT block a
974        // fresh start; the helper must probe, see no listener, clear the file,
975        // and report "no daemon".
976        let tmp = tempfile_like_dir();
977        let path = tmp.join("http_addr");
978        // Reserved unbound port — TCP connect will fail fast.
979        std::fs::write(&path, "127.0.0.1:1\n").unwrap();
980        let got = check_already_running(&path, "/health").await;
981        assert!(got.is_none(), "dead address should map to None");
982        assert!(
983            !path.exists(),
984            "stale address file should be cleaned up by check_already_running"
985        );
986    }
987
988    #[tokio::test]
989    async fn check_already_running_returns_url_when_health_ok() {
990        // Why: positive control — when a daemon really is listening and
991        // returns 2xx on the health path, the helper must report its URL so
992        // the caller can refuse to spawn a duplicate.
993        // What: spin up a one-shot mini HTTP server on an ephemeral port that
994        // answers `GET /health → 200`, write the address to the file, and
995        // confirm the helper returns the expected URL.
996        let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap();
997        let local = listener.local_addr().unwrap();
998        let server = tokio::spawn(async move {
999            use tokio::io::{AsyncReadExt, AsyncWriteExt};
1000            if let Ok((mut sock, _)) = listener.accept().await {
1001                let mut buf = [0u8; 1024];
1002                let _ = sock.read(&mut buf).await;
1003                let _ = sock
1004                    .write_all(b"HTTP/1.1 200 OK\r\nContent-Length: 2\r\n\r\nok")
1005                    .await;
1006                let _ = sock.shutdown().await;
1007            }
1008        });
1009
1010        let tmp = tempfile_like_dir();
1011        let path = tmp.join("http_addr");
1012        std::fs::write(&path, format!("{local}\n")).unwrap();
1013
1014        let got = check_already_running(&path, "/health").await;
1015        assert_eq!(got.as_deref(), Some(format!("http://{local}").as_str()));
1016        assert!(
1017            path.exists(),
1018            "address file must be preserved when the daemon is healthy"
1019        );
1020        let _ = server.await;
1021    }
1022
1023    // Test-only helper: makes a unique scratch dir without pulling in tempfile
1024    // as a dev-dep (keeps the dependency surface minimal).
1025    fn tempfile_like_dir() -> PathBuf {
1026        let pid = std::process::id();
1027        let nanos = std::time::SystemTime::now()
1028            .duration_since(std::time::UNIX_EPOCH)
1029            .map(|d| d.as_nanos())
1030            .unwrap_or(0);
1031        let p = std::env::temp_dir().join(format!("trusty-common-test-{pid}-{nanos}"));
1032        std::fs::create_dir_all(&p).unwrap();
1033        p
1034    }
1035}