nemo_flow_adaptive/acg/openai_plugin.rs
1// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4//! OpenAI cache plugin for the Adaptive Cache Governor (ACG) system.
5//!
6//! Maximizes automatic prefix cache hits through deterministic JSON
7//! serialization. OpenAI uses automatic prefix caching at 1024+ tokens
8//! with exact prefix matching -- no explicit annotations are needed.
9//! The plugin's job is to ensure that semantically identical prefixes
10//! produce byte-identical JSON so the cache hits rather than misses.
11//!
12//! Implements the [`ProviderPlugin`] trait with:
13//!
14//! - **Tool schema canonicalization**: RFC 8785 via [`canonicalize_value`]
15//! for deterministic key ordering in function parameter schemas.
16//! - **Stable message content canonicalization**: Structured JSON content
17//! blocks in the stable prefix are canonicalized for byte-identical output.
18//! - **No annotations injected**: OpenAI handles caching automatically.
19//!
20//! # Threat mitigations
21//!
22//! - T-08-06: RFC 8785 is a semantic-preserving transform (only reorders keys,
23//! normalizes numbers). The plugin canonicalizes tool schemas (structured JSON)
24//! but does NOT modify text content in messages.
25//! - T-08-09: If canonicalization fails for one tool, the plugin reports Degraded
26//! (not Applied) and continues with remaining tools.
27
28use crate::acg::capability::{BackendCapabilities, CapabilityRegistry};
29use crate::acg::plugin::{
30 HintPlanApplier, PluginInput, PluginOutput, ProviderPlugin, translate_with_hint_plan,
31};
32use crate::acg::prompt_ir::PromptIR;
33use crate::acg::translation::openai::OpenAIHintTranslator;
34use crate::acg::translation::{HintPlan, HintTranslation, HintTranslator};
35
36// ===================================================================
37// OpenAICachePlugin
38// ===================================================================
39
40/// OpenAI-specific provider plugin for deterministic JSON serialization.
41///
42/// Ensures that semantically identical request prefixes produce
43/// byte-identical JSON output, maximizing OpenAI's automatic prefix
44/// cache hit rate. Stateless -- no constructor arguments needed.
45///
46/// # Usage
47///
48/// ```rust,ignore
49/// let plugin = OpenAICachePlugin;
50/// let output = plugin.translate(&input)?;
51/// ```
52pub struct OpenAICachePlugin;
53
54impl OpenAICachePlugin {
55 #[cfg_attr(not(test), allow(dead_code))]
56 pub(crate) fn build_hint_translation(
57 &self,
58 input: &PluginInput<'_>,
59 ) -> crate::acg::error::Result<HintTranslation> {
60 let translator = OpenAIHintTranslator;
61 translator.translate(input)
62 }
63}
64
65impl ProviderPlugin for OpenAICachePlugin {
66 fn plugin_id(&self) -> &str {
67 "openai"
68 }
69
70 fn plugin_name(&self) -> &str {
71 "OpenAI Cache Plugin"
72 }
73
74 fn translate(&self, input: &PluginInput<'_>) -> crate::acg::error::Result<PluginOutput> {
75 let translator = OpenAIHintTranslator;
76 translate_with_hint_plan(&translator, self, input)
77 }
78
79 fn capabilities(&self) -> BackendCapabilities {
80 CapabilityRegistry::with_defaults()
81 .get_backend("openai")
82 .cloned()
83 .unwrap_or_else(|| BackendCapabilities::none("openai"))
84 }
85}
86
87impl HintPlanApplier for OpenAICachePlugin {
88 fn apply_hint_plan(
89 &self,
90 request: &nemo_flow::api::llm::LlmRequest,
91 prompt_ir: &PromptIR,
92 hint_plan: &HintPlan,
93 ) -> crate::acg::error::Result<nemo_flow::api::llm::LlmRequest> {
94 crate::acg::request_surfaces::apply_request_surface(
95 self.plugin_id(),
96 request,
97 prompt_ir,
98 hint_plan,
99 )
100 }
101}
102
103#[cfg(test)]
104#[path = "../../tests/unit/acg/openai_plugin_tests.rs"]
105mod tests;