iron_runtime 0.4.0

Agent runtime with LLM request routing and translation
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
//! LlmRouter - Core Rust implementation for LLM proxy
//!
//! Python bindings are provided by the `iron_sdk` crate (see ADR-010).

use std::net::TcpListener;
use std::sync::{Arc, Once};
use tokio::sync::oneshot;

use iron_cost::budget::CostController;
use iron_telemetry::{init_logging, LogLevel};

/// Initialize logging once
static LOGGING_INIT: Once = Once::new();

fn ensure_logging() {
  LOGGING_INIT.call_once(|| {
    let _ = init_logging(LogLevel::Info);
  });
}

use crate::llm_router::key_fetcher::KeyFetcher;
use crate::llm_router::proxy::{run_proxy, ProxyConfig};

#[cfg(feature = "analytics")]
use iron_runtime_analytics::EventStore;

#[cfg(feature = "analytics")]
use iron_runtime_analytics::{SyncClient, SyncConfig, SyncHandle};

/// LLM Router - Local proxy server for OpenAI/Anthropic API requests
///
/// Creates a local HTTP server that intercepts LLM API requests,
/// fetches real API keys from Iron Cage server, and forwards
/// requests to the actual provider.
///
/// # Example (Rust)
///
/// ```rust,no_run
/// use iron_runtime::llm_router::LlmRouter;
///
/// let router = LlmRouter::create(
///     "ic_xxx".to_string(),
///     "https://api.iron-cage.io".to_string(),
///     300,
/// ).expect("Failed to create router");
///
/// println!("Router listening at: {}", router.get_base_url());
/// // ... use with reqwest or other HTTP client
/// router.shutdown();
/// ```
///
/// # Python Usage
///
/// Python bindings are provided by `iron_sdk` crate:
/// ```python
/// from iron_sdk import LlmRouter
/// router = LlmRouter(api_key="ic_xxx", server_url="https://...")
/// ```
pub struct LlmRouter {
  /// Port the proxy is listening on
  port: u16,
  /// API key (IC_TOKEN)
  api_key: String,
  /// Iron Cage server URL
  #[allow(dead_code)]
  server_url: String,
  /// Auto-detected provider from API key format ("openai" or "anthropic")
  provider: String,
  /// Tokio runtime
  #[allow(dead_code)]
  runtime: tokio::runtime::Runtime,
  /// Shutdown channel
  shutdown_tx: Option<oneshot::Sender<()>>,
  /// Cost controller for budget enforcement and spending tracking (None = no budget)
  cost_controller: Option<Arc<CostController>>,
  /// Analytics event store
  #[cfg(feature = "analytics")]
  event_store: Arc<EventStore>,
  /// Agent ID for analytics attribution
  #[cfg(feature = "analytics")]
  #[allow(dead_code)]
  agent_id: Option<Arc<str>>,
  /// Provider ID for analytics attribution
  #[cfg(feature = "analytics")]
  #[allow(dead_code)]
  provider_id: Option<Arc<str>>,
  /// Analytics sync handle - auto-flushes on drop
  #[cfg(feature = "analytics")]
  #[allow(dead_code)] // Used for Drop behavior
  sync_handle: Option<SyncHandle>,
  /// Lease ID from server handshake (for budget return on shutdown)
  lease_id: Option<String>,
}

impl LlmRouter {
  /// Create a new LlmRouter instance
  ///
  /// # Arguments
  ///
  /// * `api_key` - Iron Cage API token
  /// * `server_url` - Iron Cage server URL
  /// * `cache_ttl_seconds` - How long to cache API keys (default: 300)
  ///
  /// # Returns
  ///
  /// Result with LlmRouter instance or error string
  pub fn create(
    api_key: String,
    server_url: String,
    cache_ttl_seconds: u64,
  ) -> Result<Self, String> {
    Self::create_inner(api_key, server_url, cache_ttl_seconds, None, None)
  }

  /// Create a new LlmRouter instance with budget
  ///
  /// # Arguments
  ///
  /// * `api_key` - Iron Cage API token
  /// * `server_url` - Iron Cage server URL
  /// * `cache_ttl_seconds` - How long to cache API keys
  /// * `budget` - Budget limit in USD
  pub fn create_with_budget(
    api_key: String,
    server_url: String,
    cache_ttl_seconds: u64,
    budget: f64,
  ) -> Result<Self, String> {
    Self::create_inner(api_key, server_url, cache_ttl_seconds, Some(budget), None)
  }

  /// Create a new LlmRouter instance with direct provider key
  ///
  /// Bypasses Iron Cage server - useful for testing or direct provider access
  ///
  /// # Arguments
  ///
  /// * `provider_key` - Direct provider API key (e.g., "sk-...")
  /// * `budget` - Optional budget limit in USD
  pub fn create_with_provider_key(
    provider_key: String,
    budget: Option<f64>,
  ) -> Result<Self, String> {
    Self::create_inner(
      "direct".to_string(),
      String::new(),
      0,
      budget,
      Some(provider_key),
    )
  }

  /// Create with all options
  ///
  /// # Arguments
  ///
  /// * `api_key` - Iron Cage API token (or "direct" for provider key mode)
  /// * `server_url` - Iron Cage server URL (empty for provider key mode)
  /// * `cache_ttl_seconds` - How long to cache API keys
  /// * `budget` - Optional budget limit in USD
  /// * `provider_key` - Optional direct provider API key
  pub fn create_full(
    api_key: String,
    server_url: String,
    cache_ttl_seconds: u64,
    budget: Option<f64>,
    provider_key: Option<String>,
  ) -> Result<Self, String> {
    Self::create_inner(api_key, server_url, cache_ttl_seconds, budget, provider_key)
  }

  /// Get the base URL for the OpenAI client
  ///
  /// Returns URL like "http://127.0.0.1:52431/v1"
  pub fn get_base_url(&self) -> String {
    format!("http://127.0.0.1:{}/v1", self.port)
  }

  /// Get the API key
  pub fn get_api_key(&self) -> &str {
    &self.api_key
  }

  /// Get the port the proxy is listening on
  pub fn get_port(&self) -> u16 {
    self.port
  }

  /// Get the auto-detected provider ("openai" or "anthropic")
  pub fn get_provider(&self) -> &str {
    &self.provider
  }

  /// Check if the proxy server is running
  pub fn is_running(&self) -> bool {
    self.shutdown_tx.is_some()
  }

  /// Get total spent in USD (0.0 if no budget set)
  pub fn total_spent(&self) -> f64 {
    self.cost_controller
      .as_ref()
      .map(|c| c.total_spent() as f64 / 1_000_000.0)
      .unwrap_or(0.0)
  }

  /// Set budget limit in USD
  ///
  /// # Arguments
  /// * `amount_usd` - New budget limit in USD (e.g., 10.0 for $10)
  pub fn set_budget(&self, amount_usd: f64) {
    if let Some(ref controller) = self.cost_controller {
      let budget_micros = (amount_usd * 1_000_000.0) as i64;
      controller.set_budget(budget_micros);
    }
  }

  /// Get current budget limit in USD (None if no budget set)
  pub fn get_budget(&self) -> Option<f64> {
    self.cost_controller
      .as_ref()
      .map(|c| c.budget_limit() as f64 / 1_000_000.0)
  }

  /// Get budget status as (spent, limit) tuple in USD
  /// Returns None if no budget is set
  pub fn get_budget_status(&self) -> Option<(f64, f64)> {
    self.cost_controller.as_ref().map(|c| {
      let (spent_micros, limit_micros) = c.get_status();
      (
        spent_micros as f64 / 1_000_000.0,
        limit_micros as f64 / 1_000_000.0,
      )
    })
  }

  /// Stop the proxy server
  pub fn shutdown(&mut self) {
    self.stop_inner();
  }

  /// Internal creation logic
  fn create_inner(
    api_key: String,
    server_url: String,
    cache_ttl_seconds: u64,
    budget: Option<f64>,
    provider_key: Option<String>,
  ) -> Result<Self, String> {
    // Initialize logging
    ensure_logging();

    // Find free port
    let port = find_free_port().map_err(|e| format!("Failed to find free port: {}", e))?;

    // Create tokio runtime
    let runtime = tokio::runtime::Builder::new_multi_thread()
      .worker_threads(2)
      .enable_all()
      .build()
      .map_err(|e| format!("Failed to create runtime: {}", e))?;

    // Create key fetcher - static if provider_key given, otherwise fetch from server
    let key_fetcher = Arc::new(if let Some(ref pk) = provider_key {
      KeyFetcher::new_static(pk.clone(), None)
    } else {
      KeyFetcher::new(server_url.clone(), api_key.clone(), cache_ttl_seconds)
    });

    let provider = runtime.block_on(async {
      key_fetcher
        .get_key()
        .await
        .map(|k| k.provider)
        .unwrap_or_else(|_| "unknown".to_string())
    });

    // Create shutdown channel
    let (shutdown_tx, shutdown_rx) = oneshot::channel();

    // Determine budget: use explicit budget, or fetch from server handshake, or default to 0
    // Also capture lease_id from handshake for budget return on shutdown
    // Note: API accepts dollars (f64), server returns microdollars (i64)
    let (effective_budget_micros, lease_id): (i64, Option<String>) = if let Some(b) = budget {
      // Convert dollars to microdollars
      ((b * 1_000_000.0) as i64, None)
    } else if !server_url.is_empty() {
      // Fetch budget from server handshake (Protocol 005) - already in microdollars
      match runtime.block_on(async { fetch_budget_from_handshake(&server_url, &api_key).await }) {
        Some(result) => (result.budget, Some(result.lease_id)),
        None => (0, None),
      }
    } else {
      (0, None)
    };

    // Create cost controller with effective budget (always created, defaults to 0)
    let cost_controller = Some(Arc::new(CostController::new(effective_budget_micros)));

    // Create analytics event store (feature-gated)
    #[cfg(feature = "analytics")]
    let event_store = Arc::new(EventStore::new());
    #[cfg(feature = "analytics")]
    let agent_id_arc: Option<Arc<str>> = None;
    #[cfg(feature = "analytics")]
    let provider_id_arc: Option<Arc<str>> = None;

    // Record router started event
    #[cfg(feature = "analytics")]
    event_store.record_router_started(port);

    // Create config
    let config = ProxyConfig {
      port,
      ic_token: api_key.clone(),
      server_url: server_url.clone(),
      cache_ttl_seconds,
      cost_controller: cost_controller.clone(),
      provider_key: provider_key.clone(),
      #[cfg(feature = "analytics")]
      event_store: event_store.clone(),
      #[cfg(feature = "analytics")]
      agent_id: agent_id_arc.clone(),
      #[cfg(feature = "analytics")]
      provider_id: provider_id_arc.clone(),
    };

    // Spawn proxy server
    runtime.spawn(async move {
      if let Err(e) = run_proxy(config, shutdown_rx).await {
        tracing::error!("Proxy server error: {}", e);
      }
    });

    // Start analytics sync (if server_url is provided)
    #[cfg(feature = "analytics")]
    let sync_handle = if !server_url.is_empty() {
      let sync_config = SyncConfig::new(&server_url, &api_key);
      let sync_client = SyncClient::new(event_store.clone(), sync_config);
      Some(sync_client.start(runtime.handle()))
    } else {
      None
    };

    // Wait for server to start
    std::thread::sleep(std::time::Duration::from_millis(50));

    Ok(Self {
      port,
      api_key,
      server_url,
      provider,
      runtime,
      shutdown_tx: Some(shutdown_tx),
      cost_controller,
      #[cfg(feature = "analytics")]
      event_store,
      #[cfg(feature = "analytics")]
      agent_id: agent_id_arc,
      #[cfg(feature = "analytics")]
      provider_id: provider_id_arc,
      #[cfg(feature = "analytics")]
      sync_handle,
      lease_id,
    })
  }

  fn stop_inner(&mut self) {
    if let Some(tx) = self.shutdown_tx.take() {
      // Return unused budget to server before shutting down (Protocol 005)
      if let Some(lease_id) = self.lease_id.take() {
        if !self.server_url.is_empty() {
          // Get spent amount from cost_controller (already in microdollars)
          let spent_microdollars = self
            .cost_controller
            .as_ref()
            .map(|cc| cc.total_spent())
            .unwrap_or(0);

          let url = format!("{}/api/v1/budget/return", self.server_url);
          let client = reqwest::blocking::Client::builder()
            .timeout(std::time::Duration::from_secs(5))
            .build();

          match client {
            Ok(client) => {
              match client
                .post(&url)
                .header("Content-Type", "application/json")
                .json(&serde_json::json!({"lease_id": lease_id, "spent_microdollars": spent_microdollars}))
                .send()
              {
                Ok(resp) if resp.status().is_success() => {
                  if let Ok(body) = resp.json::<serde_json::Value>() {
                    if let Some(returned) = body.get("returned").and_then(|v| v.as_i64()) {
                      tracing::info!(
                        "Budget returned to server: ${:.6} (spent: ${:.6})",
                        returned as f64 / 1_000_000.0,
                        spent_microdollars as f64 / 1_000_000.0
                      );
                    }
                  }
                }
                Ok(resp) => {
                  tracing::warn!("Budget return failed with status: {}", resp.status());
                }
                Err(e) => {
                  tracing::warn!("Budget return request failed: {}", e);
                }
              }
            }
            Err(e) => {
              tracing::warn!("Failed to create HTTP client for budget return: {}", e);
            }
          }
        }
      }

      #[cfg(feature = "analytics")]
      self.event_store.record_router_stopped();

      // Stop analytics sync (triggers flush) before stopping proxy
      #[cfg(feature = "analytics")]
      if let Some(handle) = self.sync_handle.take() {
        handle.stop(); // This triggers flush
                       // Give sync task time to complete flush
        std::thread::sleep(std::time::Duration::from_millis(500));
      }

      let _ = tx.send(());
    }
  }
}

impl Drop for LlmRouter {
  fn drop(&mut self) {
    self.stop_inner();
  }
}

/// Find an available port on localhost
fn find_free_port() -> std::io::Result<u16> {
  let listener = TcpListener::bind("127.0.0.1:0")?;
  Ok(listener.local_addr()?.port())
}

/// Result from server handshake containing budget and lease info
struct HandshakeResult {
  budget: i64, // microdollars
  lease_id: String,
}

/// Fetch budget from server handshake (Protocol 005)
async fn fetch_budget_from_handshake(server_url: &str, ic_token: &str) -> Option<HandshakeResult> {
  let client = reqwest::Client::new();
  let url = format!("{}/api/v1/budget/handshake", server_url);

  let response = match client
    .post(&url)
    .header("Authorization", format!("Bearer {}", ic_token))
    .json(&serde_json::json!({
      "ic_token": ic_token,
      "provider": "openai"
    }))
    .send()
    .await
  {
    Ok(r) => r,
    Err(e) => {
      tracing::warn!("Failed to connect to server for handshake: {}", e);
      return None;
    }
  };

  if !response.status().is_success() {
    tracing::warn!("Handshake failed with status: {}", response.status());
    return None;
  }

  #[derive(serde::Deserialize)]
  struct HandshakeResponse {
    budget_granted: i64, // microdollars
    lease_id: String,
  }

  match response.json::<HandshakeResponse>().await {
    Ok(data) => {
      tracing::info!(
        "Budget from server handshake: ${:.6} ({}μ$), lease_id: {}",
        data.budget_granted as f64 / 1_000_000.0,
        data.budget_granted,
        data.lease_id
      );
      Some(HandshakeResult {
        budget: data.budget_granted,
        lease_id: data.lease_id,
      })
    }
    Err(e) => {
      tracing::warn!("Failed to parse handshake response: {}", e);
      None
    }
  }
}