Skip to main content

entelix_core/
llm_facing.rs

1//! LLM-facing channel — type-level separation of operator-facing
2//! diagnostics from the value the model actually sees (invariant #16).
3//!
4//! Two surfaces, both narrowly defined:
5//!
6//! - [`LlmRenderable`] — `render_for_llm()` returns the raw model-facing
7//!   value; `for_llm()` wraps it in a sealed [`RenderedForLlm`] carrier
8//!   so emit sites cannot fabricate model-facing content without
9//!   passing through a registered impl. Implementors keep prose brief,
10//!   omit operator-only context (status codes, type-system
11//!   identifiers, source chains), and never echo input payloads —
12//!   those are prompt-injection vectors.
13//! - [`LlmFacingSchema`] — `strip(&Value) -> Value` reduces a JSON
14//!   Schema to the keys vendor APIs actually consume (`type`,
15//!   `properties`, `required`, `items`, `enum`, `description`,
16//!   bounds…). Schemars-generated knobs (`$schema`, `title`,
17//!   `$defs`, `$ref`, format specifiers like `int64`) ride out.
18//!   Saves 30–120 tokens per tool per request × every turn.
19//!
20//! ## Why the sealed carrier
21//!
22//! Errors, future sub-agent results, approval decisions, and
23//! memory-recall summaries all flow through the same funnel toward
24//! the model's context window. Without a sealed carrier any
25//! `String`-typed field can be fabricated by external code — a
26//! reviewer reading an emit site cannot distinguish "this string
27//! went through the LLM-facing rendering" from "this string was
28//! built directly from operator content". Wrapping the value in
29//! `RenderedForLlm<T>` whose constructor is private to this
30//! module makes the boundary structural: the only path from value
31//! to carrier is the trait's default `for_llm` impl, which wraps
32//! the implementer's `render_for_llm` output. A subtype that
33//! tries to override `for_llm` cannot reach `RenderedForLlm::new`,
34//! so the sealing holds across crate boundaries.
35//!
36//! ## Why a separate trait rather than a method on `Error`
37//!
38//! The split lets non-`Error` types (custom tool error wrappers, MCP
39//! server errors lifted into IR, future sub-agent result types) opt
40//! into the same contract without coupling to `entelix_core::Error`.
41//! Default impls on `Error` and `String`/`&str` cover the common
42//! cases; bespoke implementors override `render_for_llm` only.
43//!
44//! ## Enforcement
45//!
46//! `crates/entelix-tools/tests/llm_context_economy.rs` regression-checks
47//! that built-in tool outputs and tool-spec schemas never leak the
48//! forbidden patterns. CI rejects new sites silently re-introducing
49//! operator-channel content into the model's view.
50
51use std::collections::BTreeMap;
52
53use serde_json::{Map, Value};
54
55use crate::error::Error;
56
57/// Sealed carrier for a model-facing value of type `T`. Constructed
58/// only by [`LlmRenderable::for_llm`]'s default impl — the
59/// constructor is `pub(crate)`, so an external crate that
60/// implements [`LlmRenderable<T>`] for its own type can override
61/// `render_for_llm` (the raw producer) but cannot override
62/// `for_llm` (the carrier-producing wrapper) because it has no way
63/// to reach `RenderedForLlm::new`. Emit sites that accept
64/// `RenderedForLlm<T>` therefore receive a value that
65/// structurally must have come through the trait funnel.
66///
67/// `RenderedForLlm` is intentionally minimal — it exposes
68/// [`Self::into_inner`] for consumers that need to forward the
69/// underlying value (the audit-log projection of
70/// `AgentEvent::ToolError` does exactly this when emitting the
71/// model-safe rendering as `GraphEvent::ToolResult` content). The
72/// carrier carries no metadata because the boundary it enforces is
73/// authorship, not provenance.
74#[derive(Clone, Debug, Eq, Hash, PartialEq)]
75pub struct RenderedForLlm<T>(T);
76
77impl<T> RenderedForLlm<T> {
78    /// Sealed constructor — only [`LlmRenderable::for_llm`]'s
79    /// default impl reaches this. `pub(crate)` is the entire seal.
80    pub(crate) const fn new(inner: T) -> Self {
81        Self(inner)
82    }
83
84    /// Borrow the inner model-facing value.
85    #[must_use]
86    pub const fn as_inner(&self) -> &T {
87        &self.0
88    }
89
90    /// Consume the carrier and return the inner value.
91    #[must_use]
92    pub fn into_inner(self) -> T {
93        self.0
94    }
95}
96
97impl<T: AsRef<str>> AsRef<str> for RenderedForLlm<T> {
98    fn as_ref(&self) -> &str {
99        self.0.as_ref()
100    }
101}
102
103impl<T: std::fmt::Display> std::fmt::Display for RenderedForLlm<T> {
104    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
105        self.0.fmt(f)
106    }
107}
108
109impl<T> serde::Serialize for RenderedForLlm<T>
110where
111    T: serde::Serialize,
112{
113    fn serialize<S: serde::Serializer>(&self, ser: S) -> std::result::Result<S::Ok, S::Error> {
114        self.0.serialize(ser)
115    }
116}
117
118impl<'de, T> serde::Deserialize<'de> for RenderedForLlm<T>
119where
120    T: serde::Deserialize<'de>,
121{
122    fn deserialize<D: serde::Deserializer<'de>>(de: D) -> std::result::Result<Self, D::Error> {
123        // Audit-log replay paths (re-load `AgentEvent::ToolError`
124        // events from a `SessionLog`) must reconstruct the carrier
125        // around its persisted inner value. The persisted value
126        // already passed `for_llm` on first emit (invariant 18 —
127        // events are the SSoT), so deserialising into the carrier
128        // is the inverse, not a fresh fabrication.
129        T::deserialize(de).map(Self::new)
130    }
131}
132
133/// Render a value (typically an error, sub-agent result, or
134/// memory-recall summary) into the short, actionable form the
135/// model is allowed to see. Implementors define
136/// [`Self::render_for_llm`] (the raw producer); the default
137/// [`Self::for_llm`] wraps the result in a sealed
138/// [`RenderedForLlm`] carrier whose constructor is private to this
139/// crate, so emit sites that accept the carrier receive a value
140/// that structurally went through the trait.
141///
142/// Implementations keep prose brief, omit operator-only context
143/// (status codes, type-system identifiers, source chains), and
144/// never echo input payloads — those are prompt-injection vectors.
145/// The full operator-facing form continues to flow through
146/// `Display` / `Error::source` / event sinks / OTel.
147pub trait LlmRenderable<T> {
148    /// The raw model-facing rendering. Must not include vendor
149    /// status codes, `provider returned …` framing, source chains,
150    /// RFC3339 timestamps, or internal type names — operator
151    /// channels carry those.
152    fn render_for_llm(&self) -> T;
153
154    /// Sealed carrier wrapping [`Self::render_for_llm`]'s output.
155    /// External crates that implement this trait cannot override
156    /// this method without access to `RenderedForLlm::new`, which
157    /// is `pub(crate)` to `entelix-core`. The boundary therefore
158    /// holds across crate boundaries: only `entelix-core`'s default
159    /// impl can produce a `RenderedForLlm<T>`.
160    fn for_llm(&self) -> RenderedForLlm<T> {
161        RenderedForLlm::new(self.render_for_llm())
162    }
163}
164
165// `use_self` would prefer `Self` in place of `String` here, but the
166// trait param `String` and the receiver type `String` are
167// fundamentally the same in this two-parameter `LlmRenderable<T>`
168// shape — substituting `Self` reads worse than the explicit form.
169#[allow(clippy::use_self)]
170impl LlmRenderable<String> for String {
171    /// Identity rendering. The seal still holds — `for_llm()`'s
172    /// default impl (the only path to `RenderedForLlm::new`) routes
173    /// every emit through this trait, even when the operator's hint
174    /// is already a plain string. Validators raising
175    /// `Error::ModelRetry` thus write
176    /// `"corrective text".to_owned().for_llm()` and the type system
177    /// confirms the rendering boundary was crossed.
178    fn render_for_llm(&self) -> String {
179        self.clone()
180    }
181}
182
183impl LlmRenderable<String> for &str {
184    fn render_for_llm(&self) -> String {
185        (*self).to_owned()
186    }
187}
188
189impl LlmRenderable<String> for Error {
190    /// Short, model-actionable rendering. Mapping:
191    ///
192    /// - `InvalidRequest(msg)` → `"invalid input: {msg}"` — the
193    ///   message is already caller-supplied and free of vendor
194    ///   identifiers.
195    /// - `Provider { .. }` → `"upstream model error"` — vendor
196    ///   status is operator-only.
197    /// - `Auth(_)` → `"authentication failed"` — never echo the
198    ///   underlying provider's auth diagnostic.
199    /// - `Config(_)` → `"tool misconfigured"` — operator must fix.
200    /// - `Cancelled` → `"cancelled"`.
201    /// - `DeadlineExceeded` → `"timed out"`.
202    /// - `Interrupted { .. }` → `"awaiting human review"`.
203    /// - `Serde(_)` → `"output could not be serialised"` — the
204    ///   inner serde error names internal types.
205    fn render_for_llm(&self) -> String {
206        match self {
207            Self::InvalidRequest(msg) => format!("invalid input: {msg}"),
208            Self::Provider { .. } => "upstream model error".to_owned(),
209            Self::Auth(_) => "authentication failed".to_owned(),
210            Self::Config(_) => "tool misconfigured".to_owned(),
211            Self::Cancelled => "cancelled".to_owned(),
212            Self::DeadlineExceeded => "timed out".to_owned(),
213            Self::Interrupted { .. } => "awaiting human review".to_owned(),
214            Self::Serde(_) => "output could not be serialised".to_owned(),
215            // Usage-limit breaches are operational signals — the
216            // model does not need budget visibility (and exposing
217            // it would invite the model to plan around limits).
218            Self::UsageLimitExceeded(_) => "request quota reached".to_owned(),
219            // `ModelRetry` carries an already-rendered hint by
220            // construction — surface that text verbatim. The retry
221            // loop catches the variant before LLM emission in normal
222            // flow; this branch covers leaks past the loop boundary.
223            Self::ModelRetry { hint, .. } => hint.as_inner().clone(),
224        }
225    }
226}
227
228/// JSON-Schema sanitiser — strips schemars / draft-meta keys that
229/// vendor APIs ignore but that still cost tokens to ship.
230pub struct LlmFacingSchema;
231
232/// JSON-Schema key classification — drives the schema-aware walk.
233///
234/// Different keys hold different *kinds* of value: some carry literal
235/// data (`type: "string"`, `description: "..."`), some carry a single
236/// nested schema (`items`, `additionalProperties` when an object),
237/// some carry an array of schemas (`anyOf`, `oneOf`, `allOf`), some
238/// carry a `map<user-name, schema>` (`properties`), and some carry
239/// user data that must not be schema-walked (`enum`, `default`,
240/// `const`, `required`). The classifier picks the right walk for
241/// each key so user-named properties survive the strip and user
242/// values are not accidentally pruned to empty objects.
243enum AllowedKey {
244    /// Literal value — `type`, `description`, bounds, `format`, …
245    /// Cloned through (with the `format` noise filter applied).
246    Literal,
247    /// Single nested schema — `items` (single-schema form),
248    /// `additionalProperties` (when an object), `not`.
249    Schema,
250    /// Array of nested schemas — `anyOf`, `oneOf`, `allOf`.
251    /// `items` (array form) also flows through here at runtime.
252    SchemaArray,
253    /// Map of user-named entries to schemas — `properties`. Keys
254    /// are preserved verbatim; values are schema-walked.
255    SchemaMap,
256    /// User data — `enum`, `default`, `const`, `required`. Cloned
257    /// verbatim; never schema-walked.
258    UserData,
259}
260
261fn classify(key: &str) -> Option<AllowedKey> {
262    Some(match key {
263        "type" | "description" | "minimum" | "maximum" | "exclusiveMinimum"
264        | "exclusiveMaximum" | "minLength" | "maxLength" | "minItems" | "maxItems"
265        | "uniqueItems" | "minProperties" | "maxProperties" | "pattern" | "format" => {
266            AllowedKey::Literal
267        }
268        "items" | "additionalProperties" | "not" => AllowedKey::Schema,
269        "anyOf" | "oneOf" | "allOf" => AllowedKey::SchemaArray,
270        "properties" => AllowedKey::SchemaMap,
271        "enum" | "default" | "const" | "required" => AllowedKey::UserData,
272        _ => return None,
273    })
274}
275
276/// `format` values that read as noise to the vendor — the
277/// JSON-Schema-encoded width hint is already implied by
278/// `type: "integer"`/`"number"` and the model gains nothing from
279/// seeing it. Removing them shrinks the wire without losing meaning.
280const NOISY_FORMATS: &[&str] = &[
281    "int8", "int16", "int32", "int64", "uint8", "uint16", "uint32", "uint64", "float", "double",
282];
283
284impl LlmFacingSchema {
285    /// Walk `schema` and return a copy containing only
286    /// vendor-relevant keys. The walk inlines `$ref`/`$defs`
287    /// indirection so the resulting schema is self-contained — no
288    /// dangling references, no draft-meta envelope.
289    #[must_use]
290    pub fn strip(schema: &Value) -> Value {
291        let defs = collect_defs(schema);
292        strip_schema(schema, &defs)
293    }
294}
295
296fn collect_defs(schema: &Value) -> BTreeMap<String, Value> {
297    let mut out = BTreeMap::new();
298    if let Some(obj) = schema.as_object() {
299        // Merge `$defs` (2020-12) and the legacy `definitions` key.
300        for key in ["$defs", "definitions"] {
301            if let Some(Value::Object(defs)) = obj.get(key) {
302                for (name, body) in defs {
303                    out.insert(name.clone(), body.clone());
304                }
305            }
306        }
307    }
308    out
309}
310
311/// Strip one schema node. Resolves `$ref` indirection up front, then
312/// dispatches each surviving key according to its [`AllowedKey`]
313/// classification.
314fn strip_schema(node: &Value, defs: &BTreeMap<String, Value>) -> Value {
315    let Some(obj) = node.as_object() else {
316        // Not an object (likely a boolean schema like
317        // `additionalProperties: false` or an `items: true` shorthand)
318        // — clone through unchanged.
319        return node.clone();
320    };
321
322    // `$ref` short-circuits — replace the whole node with the
323    // stripped definition body. Eliminates `$defs` indirection.
324    if let Some(Value::String(reference)) = obj.get("$ref")
325        && let Some(name) = reference
326            .strip_prefix("#/$defs/")
327            .or_else(|| reference.strip_prefix("#/definitions/"))
328        && let Some(target) = defs.get(name)
329    {
330        return strip_schema(target, defs);
331    }
332
333    let mut out = Map::new();
334    for (key, value) in obj {
335        let Some(kind) = classify(key) else {
336            continue;
337        };
338        match kind {
339            AllowedKey::Literal => {
340                if key == "format"
341                    && let Some(format) = value.as_str()
342                    && NOISY_FORMATS.contains(&format)
343                {
344                    continue;
345                }
346                out.insert(key.clone(), value.clone());
347            }
348            AllowedKey::Schema => {
349                // `items` may be a single schema or an array of
350                // schemas (tuple-style validation); `additionalProperties`
351                // may be a boolean. Dispatch per shape.
352                let stripped = match value {
353                    Value::Array(arr) => {
354                        Value::Array(arr.iter().map(|v| strip_schema(v, defs)).collect())
355                    }
356                    other => strip_schema(other, defs),
357                };
358                out.insert(key.clone(), stripped);
359            }
360            AllowedKey::SchemaArray => {
361                if let Value::Array(arr) = value {
362                    let stripped: Vec<Value> = arr.iter().map(|v| strip_schema(v, defs)).collect();
363                    out.insert(key.clone(), Value::Array(stripped));
364                } else {
365                    // Malformed — keep the original; the vendor will
366                    // reject it with a clearer error than we can
367                    // synthesize here.
368                    out.insert(key.clone(), value.clone());
369                }
370            }
371            AllowedKey::SchemaMap => {
372                // User-named keys → preserve verbatim, values → walk.
373                if let Value::Object(map) = value {
374                    let stripped: Map<String, Value> = map
375                        .iter()
376                        .map(|(k, v)| (k.clone(), strip_schema(v, defs)))
377                        .collect();
378                    out.insert(key.clone(), Value::Object(stripped));
379                } else {
380                    out.insert(key.clone(), value.clone());
381                }
382            }
383            AllowedKey::UserData => {
384                out.insert(key.clone(), value.clone());
385            }
386        }
387    }
388    Value::Object(out)
389}
390
391#[cfg(test)]
392#[allow(clippy::unwrap_used, clippy::indexing_slicing)]
393mod tests {
394    use super::*;
395    use serde_json::json;
396
397    #[test]
398    fn render_for_llm_omits_provider_status() {
399        let err = Error::provider_http(503, "vendor down".to_owned());
400        let rendered = err.render_for_llm();
401        assert!(!rendered.contains("503"), "{rendered}");
402        assert!(!rendered.contains("vendor down"), "{rendered}");
403        assert!(!rendered.contains("provider returned"), "{rendered}");
404    }
405
406    #[test]
407    fn render_for_llm_invalid_request_carries_caller_message() {
408        let err = Error::invalid_request("missing 'task' field");
409        assert_eq!(err.render_for_llm(), "invalid input: missing 'task' field");
410    }
411
412    #[test]
413    fn strip_removes_schema_envelope() {
414        let raw = json!({
415            "$schema": "https://json-schema.org/draft/2020-12/schema",
416            "title": "DoubleInput",
417            "type": "object",
418            "properties": {"n": {"type": "integer", "format": "int64"}},
419            "required": ["n"]
420        });
421        let stripped = LlmFacingSchema::strip(&raw);
422        assert!(stripped.get("$schema").is_none());
423        assert!(stripped.get("title").is_none());
424        assert_eq!(stripped["type"], "object");
425        assert_eq!(stripped["properties"]["n"]["type"], "integer");
426        // int64 is the noisy width hint — dropped.
427        assert!(stripped["properties"]["n"].get("format").is_none());
428        assert_eq!(stripped["required"], json!(["n"]));
429    }
430
431    #[test]
432    fn strip_inlines_refs_and_drops_defs_envelope() {
433        let raw = json!({
434            "$schema": "https://json-schema.org/draft/2020-12/schema",
435            "title": "Outer",
436            "type": "object",
437            "properties": {"inner": {"$ref": "#/$defs/Inner"}},
438            "$defs": {
439                "Inner": {
440                    "title": "Inner",
441                    "type": "object",
442                    "properties": {"x": {"type": "string"}},
443                    "required": ["x"]
444                }
445            }
446        });
447        let stripped = LlmFacingSchema::strip(&raw);
448        assert!(stripped.get("$defs").is_none());
449        let inner = &stripped["properties"]["inner"];
450        // $ref resolved → inlined object, title gone.
451        assert_eq!(inner["type"], "object");
452        assert_eq!(inner["properties"]["x"]["type"], "string");
453        assert!(inner.get("title").is_none());
454    }
455
456    #[test]
457    fn strip_keeps_meaningful_format_specifiers() {
458        // `date-time`, `email`, `uri` are real vendor-honored
459        // formats — the noise list only targets width hints.
460        let raw = json!({
461            "type": "string",
462            "format": "date-time"
463        });
464        let stripped = LlmFacingSchema::strip(&raw);
465        assert_eq!(stripped["format"], "date-time");
466    }
467}