Skip to main content

nemo_flow/codec/
traits.rs

1// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4//! LLM codec traits for bidirectional request translation.
5
6use crate::api::llm::LlmRequest;
7use crate::error::Result;
8use crate::json::Json;
9
10use super::request::AnnotatedLlmRequest;
11use super::response::AnnotatedLlmResponse;
12
13// ---------------------------------------------------------------------------
14// LlmCodec trait
15// ---------------------------------------------------------------------------
16
17/// A bidirectional translator between opaque [`LlmRequest`] content and
18/// structured [`AnnotatedLlmRequest`].
19///
20/// Codecs are implemented by integration patches (LangChain, LangChain-NVIDIA,
21/// LangGraph, etc.) since each SDK has its own request format. They are
22/// registered by name in the global codec registry.
23///
24/// # Design
25///
26/// - **Synchronous**: `decode`/`encode` are pure data transforms (JSON
27///   restructuring), not I/O operations. This matches existing guardrails
28///   and request intercepts.
29/// - **`Send + Sync`**: Required because [`NemoFlowContextState`](crate::api::runtime::NemoFlowContextState)
30///   is behind `Arc<RwLock<>>` and accessed from async contexts.
31/// - **Trait object**: Codecs are registered at runtime (e.g., by Python
32///   patches), so the Rust core cannot know concrete types at compile time.
33///   Store as `Arc<dyn LlmCodec>`.
34pub trait LlmCodec: Send + Sync {
35    /// Parse opaque request content into structured form.
36    fn decode(&self, request: &LlmRequest) -> Result<AnnotatedLlmRequest>;
37
38    /// Merge structured changes back into the opaque request.
39    ///
40    /// The `original` parameter is the pre-intercept [`LlmRequest`], used to
41    /// preserve fields that the Codec does not structurally model. Implementations
42    /// MUST use merge-not-replace semantics: overlay structured changes onto
43    /// the original content, do not construct a fresh content object.
44    fn encode(&self, annotated: &AnnotatedLlmRequest, original: &LlmRequest) -> Result<LlmRequest>;
45}
46
47// ---------------------------------------------------------------------------
48// LlmResponseCodec trait
49// ---------------------------------------------------------------------------
50
51/// Decode-only codec for LLM API responses.
52///
53/// Unlike [`LlmCodec`] (which is bidirectional for requests), response codecs
54/// are introspection-only: they parse a raw response into structured form but
55/// never need to encode back. This matches the pipeline design where responses
56/// are observed, not modified.
57///
58/// # Design
59///
60/// - **Synchronous**: `decode_response` is a pure data transform (JSON parsing),
61///   not an I/O operation.
62/// - **`Send + Sync`**: Required for storage in `Arc` behind `RwLock`.
63/// - **Trait object**: Codecs are registered at runtime, stored as
64///   `Arc<dyn LlmResponseCodec>`.
65/// - **Fallible**: Returns `Result`; managed call sites may omit annotations on
66///   decode failure, while manual lifecycle bindings may surface the error.
67///
68/// # Two-Phase Decode
69///
70/// Implementations should use a two-phase decode pattern:
71/// 1. Deserialize raw JSON into API-specific intermediate structs
72/// 2. Map intermediate structs into the normalized `AnnotatedLlmResponse`
73pub trait LlmResponseCodec: Send + Sync {
74    /// Parse a raw JSON response into normalized structured form.
75    ///
76    /// Implementations should return `Err` only for genuinely unparseable input.
77    fn decode_response(&self, response: &Json) -> Result<AnnotatedLlmResponse>;
78}