Skip to main content

orcs_lua/llm_command/
mod.rs

1//! Multi-provider LLM client for `orcs.llm()`.
2//!
3//! # Architecture
4//!
5//! Two provider families, one unified HTTP transport (reqwest):
6//!
7//! ```text
8//! Lua: orcs.llm(prompt, opts)
9//!   → Capability::LLM gate (ctx_fns / child)
10//!   → llm_request_impl (Rust/reqwest)
11//!       ├── Ollama:    POST {base_url}/v1/chat/completions  ─┐
12//!       ├── OpenAI:    POST {base_url}/v1/chat/completions  ─┤ WireFormat::OpenAI
13//!       │   (llama.cpp, vLLM, LM Studio also use this)      ─┘
14//!       └── Anthropic: POST {base_url}/v1/messages           ── WireFormat::Anthropic
15//! ```
16//!
17//! # Design Decisions
18//!
19//! - **Provider vs WireFormat**: `Provider` identifies the server (Ollama, OpenAI,
20//!   Anthropic) with its own defaults. `WireFormat` (OpenAI or Anthropic) controls
21//!   request/response serialization. Multiple providers can share a wire format.
22//! - **reqwest (async)**: HTTP client bridged into sync Lua context via
23//!   `tokio::task::block_in_place(|| handle.block_on(...))`. Enables future
24//!   streaming support and aligns with `async-openai`'s internal transport.
25//! - **Anthropic kept separate**: distinct wire format (`content[]` blocks,
26//!   `x-api-key` header, `anthropic-version` header).
27//!
28//! # Session Management
29//!
30//! Conversation history is stored in-memory per Lua VM via `SessionStore` (Lua app_data).
31//! - `session_id = nil` → create new session (UUID v4), return session_id in response
32//! - `session_id = "existing-id"` → append to existing history and continue
33//!
34//! # Rate Limiting & Retry
35//!
36//! Automatic retry with exponential backoff for transient errors:
37//! - HTTP 429: respects `Retry-After` header, falls back to exponential backoff
38//! - HTTP 5xx: exponential backoff (1s, 2s, 4s, capped at 30s)
39//! - Transport errors (timeout, connection reset): exponential backoff
40//! - Default: 2 retries (3 total attempts), configurable via `opts.max_retries`
41//!
42//! # Session Persistence
43//!
44//! - `orcs.llm_dump_sessions()` → JSON string of all session histories
45//! - `orcs.llm_load_sessions(json)` → restore sessions from JSON
46//!
47//! # Technical Debt
48//!
49//! - Streaming not supported (`stream: false` fixed)
50//! - Multi-turn tool loops not supported (Phase 6: resolve flow)
51
52mod provider;
53pub(crate) mod resolve;
54pub(crate) mod retry;
55mod session;
56
57use mlua::{Lua, Table};
58use orcs_types::intent::{ContentBlock, MessageContent, StopReason};
59use std::collections::HashMap;
60use std::time::{Duration, Instant};
61
62use provider::{build_request_body, build_tools_for_provider, Provider};
63use resolve::{
64    build_assistant_content_blocks, build_lua_result, dispatch_intents_to_results,
65    parse_response_body, ResponseOrError,
66};
67use retry::{build_classified_error_result, build_error_result, send_with_retry, SendError};
68use session::{
69    append_message, build_messages, ensure_session_store, resolve_session_id,
70    session_message_count, update_session, Message, SessionStore,
71};
72
73/// Default timeout in seconds for LLM requests.
74const DEFAULT_TIMEOUT_SECS: u64 = 120;
75
76/// Default max_tokens for Anthropic (required field).
77///
78/// 8192 is chosen because code-generation tool calls (edit/write) routinely
79/// exceed 4096 output tokens, causing truncated JSON and `resolve=false`.
80const ANTHROPIC_DEFAULT_MAX_TOKENS: u64 = 8192;
81
82/// Maximum response body size (10 MiB).
83const MAX_BODY_SIZE: u64 = 10 * 1024 * 1024;
84
85/// Default number of retries for transient errors (429, 5xx).
86const DEFAULT_MAX_RETRIES: u32 = 2;
87
88/// Base delay for exponential backoff (milliseconds).
89const RETRY_BASE_DELAY_MS: u64 = 1000;
90
91/// Maximum delay between retries (seconds).
92const RETRY_MAX_DELAY_SECS: u64 = 30;
93
94/// Default maximum number of tool-loop turns (resolve mode).
95const DEFAULT_MAX_TOOL_TURNS: u32 = 10;
96
97/// Turn budget reminder threshold: inject a reminder when remaining
98/// turns fall to this value or below (research shows 27-60% performance
99/// degradation without reminders — arXiv:2510.16786).
100const TURN_REMINDER_THRESHOLD: u32 = 3;
101
102// ── Shared HTTP Client ────────────────────────────────────────────────
103
104/// Wrapper for `reqwest::Client` stored in Lua app_data.
105///
106/// A single shared client is reused across all LLM and HTTP requests within
107/// a Lua VM, avoiding repeated TLS backend and connection pool initialization.
108/// Per-request timeout is set via `RequestBuilder::timeout()`.
109struct SharedHttpClient(reqwest::Client);
110
111/// Get or create the shared `reqwest::Client` from Lua app_data.
112pub(crate) fn get_or_init_http_client(lua: &Lua) -> Result<reqwest::Client, mlua::Error> {
113    if let Some(shared) = lua.app_data_ref::<SharedHttpClient>() {
114        return Ok(shared.0.clone());
115    }
116    let client = reqwest::Client::builder()
117        .build()
118        .map_err(|e| mlua::Error::RuntimeError(format!("failed to build HTTP client: {e}")))?;
119    lua.set_app_data(SharedHttpClient(client.clone()));
120    Ok(client)
121}
122
123// ── HIL Turn Accumulator ─────────────────────────────────────────────
124
125/// Tracks consumed tool turns across HIL re-dispatches within a single Lua VM.
126///
127/// When `hil_intents=true` and a `Suspended` error occurs, the resolve loop
128/// saves the number of consumed turns here.  On re-dispatch, the next
129/// `llm_request_impl` call reads this value as `turn_offset` so the total
130/// turn budget is preserved across HIL interruptions.
131struct HilTurnAccumulator(u32);
132
133/// Read and reset the HIL turn accumulator.  Returns the stored offset
134/// (0 if none).  The reset ensures a clean slate for the current call;
135/// only a subsequent `Suspended` will write a new value.
136fn take_hil_turn_accumulator(lua: &Lua) -> u32 {
137    lua.remove_app_data::<HilTurnAccumulator>()
138        .map_or(0, |acc| acc.0)
139}
140
141/// Save accumulated turns for the next HIL re-dispatch.
142fn set_hil_turn_accumulator(lua: &Lua, turns: u32) {
143    lua.set_app_data(HilTurnAccumulator(turns));
144}
145
146/// Preserves the session ID across HIL re-dispatches within a single Lua VM.
147///
148/// On `Suspended`, the resolve loop saves the current session ID here so the
149/// re-dispatched `llm_request_impl` call can resume the same conversation
150/// (with full message history) instead of starting a blank session.
151struct HilSessionId(String);
152
153/// Read and reset the HIL session ID.  Returns `None` if no session was saved.
154fn take_hil_session_id(lua: &Lua) -> Option<String> {
155    lua.remove_app_data::<HilSessionId>().map(|s| s.0)
156}
157
158/// Save session ID for the next HIL re-dispatch.
159fn set_hil_session_id(lua: &Lua, session_id: String) {
160    lua.set_app_data(HilSessionId(session_id));
161}
162
163// ── Parsed Options ─────────────────────────────────────────────────────
164
165/// Parsed and validated options from the Lua opts table.
166#[derive(Debug)]
167pub(super) struct LlmOpts {
168    pub provider: Provider,
169    pub base_url: String,
170    pub model: String,
171    pub api_key: Option<String>,
172    pub system_prompt: Option<String>,
173    pub session_id: Option<String>,
174    pub temperature: Option<f64>,
175    pub max_tokens: Option<u64>,
176    pub timeout: u64,
177    pub max_retries: u32,
178    /// Whether to send IntentDefs as tools to the LLM (default: true).
179    pub tools: bool,
180    /// Whether to auto-resolve intents in Rust (default: false).
181    /// When true, tool_call intents are dispatched automatically and
182    /// results are fed back to the LLM in a multi-turn loop.
183    /// When false, intents are returned to Lua for manual dispatch.
184    pub resolve: bool,
185    /// Maximum number of tool-loop turns before stopping (default: 10).
186    pub max_tool_turns: u32,
187    /// Whether to propagate `Suspended` errors for HIL approval instead of
188    /// converting them to `tool_result` errors (default: false).
189    ///
190    /// When `true`, intent permission denials bubble up as
191    /// `ComponentError::Suspended`, allowing the ChannelRunner to trigger
192    /// the HIL approval flow.  The session is preserved with synthetic
193    /// tool_results so the LLM retains full context on re-dispatch.
194    ///
195    /// When `false` (default), `Suspended` is caught and returned as an
196    /// `is_error = true` tool_result so the LLM can adapt (e.g., delegate
197    /// to an agent with the required permissions).
198    pub hil_intents: bool,
199    /// Wall-clock timeout in seconds for the entire resolve loop (default: None).
200    ///
201    /// When set, the resolve loop checks elapsed time at each iteration and
202    /// returns `error_kind = "overall_timeout"` if the deadline is exceeded.
203    /// Unlike `timeout` (per-HTTP-request), this covers the total operation
204    /// including all LLM API calls, tool dispatches, and retries.
205    pub overall_timeout: Option<u64>,
206}
207
208impl LlmOpts {
209    /// Parse from Lua opts table. Missing fields use provider defaults.
210    fn from_lua(opts: Option<&Table>) -> Result<Self, String> {
211        let provider_str = opts
212            .and_then(|o| o.get::<String>("provider").ok())
213            .unwrap_or_else(|| "ollama".to_string());
214        let provider: Provider = provider_str.parse()?;
215
216        // base_url resolution: opts.base_url > ORCS_LLM_BASE_URL env > provider default
217        let base_url = opts
218            .and_then(|o| o.get::<String>("base_url").ok())
219            .or_else(|| std::env::var("ORCS_LLM_BASE_URL").ok())
220            .unwrap_or_else(|| provider.default_base_url().to_string());
221
222        let model = opts
223            .and_then(|o| o.get::<String>("model").ok())
224            .unwrap_or_else(|| provider.default_model().to_string());
225
226        // API key resolution: opts.api_key > env var > None
227        let api_key = opts
228            .and_then(|o| o.get::<String>("api_key").ok())
229            .or_else(|| {
230                provider
231                    .api_key_env()
232                    .and_then(|env_name| std::env::var(env_name).ok())
233            });
234
235        let system_prompt = opts.and_then(|o| o.get::<String>("system_prompt").ok());
236        let session_id = opts.and_then(|o| o.get::<String>("session_id").ok());
237        let temperature = opts.and_then(|o| o.get::<f64>("temperature").ok());
238        let max_tokens = opts.and_then(|o| o.get::<u64>("max_tokens").ok());
239
240        let timeout = opts
241            .and_then(|o| o.get::<u64>("timeout").ok())
242            .unwrap_or(DEFAULT_TIMEOUT_SECS);
243
244        let max_retries = opts
245            .and_then(|o| o.get::<u32>("max_retries").ok())
246            .unwrap_or(DEFAULT_MAX_RETRIES);
247
248        // Note: get::<bool>(key) coerces Lua nil → false (Lua falsiness),
249        // so missing keys would return Ok(false) instead of Err.
250        // Use Option<bool> to distinguish nil (missing) from explicit false.
251        let tools = opts
252            .and_then(|o| o.get::<Option<bool>>("tools").ok())
253            .flatten()
254            .unwrap_or(true);
255
256        let resolve = opts
257            .and_then(|o| o.get::<Option<bool>>("resolve").ok())
258            .flatten()
259            .unwrap_or(false);
260
261        let max_tool_turns = opts
262            .and_then(|o| o.get::<u32>("max_tool_turns").ok())
263            .unwrap_or(DEFAULT_MAX_TOOL_TURNS);
264
265        let hil_intents = opts
266            .and_then(|o| o.get::<Option<bool>>("hil_intents").ok())
267            .flatten()
268            .unwrap_or(false);
269
270        let overall_timeout = opts.and_then(|o| o.get::<u64>("overall_timeout").ok());
271
272        Ok(Self {
273            provider,
274            base_url,
275            model,
276            api_key,
277            system_prompt,
278            session_id,
279            temperature,
280            max_tokens,
281            timeout,
282            max_retries,
283            tools,
284            resolve,
285            max_tool_turns,
286            hil_intents,
287            overall_timeout,
288        })
289    }
290}
291
292// ── Ping Implementation ───────────────────────────────────────────────
293
294/// Default timeout in seconds for health-check pings.
295const PING_TIMEOUT_SECS: u64 = 5;
296
297/// Executes a lightweight connectivity check against the LLM provider.
298///
299/// Sends a single HTTP GET to the provider's health endpoint and measures
300/// round-trip latency. Does **not** consume tokens or create sessions.
301///
302/// # Arguments (from Lua)
303///
304/// * `opts` - Optional table:
305///   - `provider`  - "ollama" (default), "openai", "anthropic"
306///   - `base_url`  - Provider base URL (default per provider)
307///   - `api_key`   - API key (falls back to env var)
308///   - `timeout`   - Timeout in seconds (default: 5)
309///
310/// # Returns (Lua table)
311///
312/// * `ok`         - boolean (true if HTTP response received, even non-2xx)
313/// * `provider`   - Provider name string
314/// * `base_url`   - Resolved base URL
315/// * `latency_ms` - Round-trip time in milliseconds
316/// * `status`     - HTTP status code (when response received)
317/// * `error`      - Error message (when ok=false)
318/// * `error_kind` - Error classification (when ok=false)
319pub fn llm_ping_impl(lua: &Lua, opts: Option<Table>) -> mlua::Result<Table> {
320    // Parse provider/base_url/api_key from opts (reuse LlmOpts parsing logic)
321    let provider_str = opts
322        .as_ref()
323        .and_then(|o| o.get::<String>("provider").ok())
324        .unwrap_or_else(|| "ollama".to_string());
325    let provider: Result<Provider, _> = provider_str.parse();
326    let provider = match provider {
327        Ok(p) => p,
328        Err(e) => {
329            let result = lua.create_table()?;
330            result.set("ok", false)?;
331            result.set("error", e)?;
332            result.set("error_kind", "invalid_options")?;
333            return Ok(result);
334        }
335    };
336
337    let base_url = opts
338        .as_ref()
339        .and_then(|o| o.get::<String>("base_url").ok())
340        .or_else(|| std::env::var("ORCS_LLM_BASE_URL").ok())
341        .unwrap_or_else(|| provider.default_base_url().to_string());
342
343    let api_key = opts
344        .as_ref()
345        .and_then(|o| o.get::<String>("api_key").ok())
346        .or_else(|| {
347            provider
348                .api_key_env()
349                .and_then(|env_name| std::env::var(env_name).ok())
350        });
351
352    let timeout = opts
353        .as_ref()
354        .and_then(|o| o.get::<u64>("timeout").ok())
355        .unwrap_or(PING_TIMEOUT_SECS);
356
357    // Build URL
358    let url = format!(
359        "{}{}",
360        base_url.trim_end_matches('/'),
361        provider.health_path()
362    );
363
364    // Get shared client
365    let client = get_or_init_http_client(lua)?;
366
367    // Get tokio runtime handle for async→sync bridge
368    let handle = match tokio::runtime::Handle::try_current() {
369        Ok(h) => h,
370        Err(_) => {
371            let result = lua.create_table()?;
372            result.set("ok", false)?;
373            result.set("error", "no tokio runtime available for async HTTP")?;
374            result.set("error_kind", "runtime")?;
375            return Ok(result);
376        }
377    };
378
379    // Send GET and measure latency
380    let start = std::time::Instant::now();
381
382    let mut req = client.get(&url).timeout(Duration::from_secs(timeout));
383    // Attach auth headers for providers that need them
384    match provider {
385        Provider::Ollama => {
386            // Ollama does not require authentication
387        }
388        Provider::OpenAI => {
389            if let Some(ref key) = api_key {
390                req = req.header("Authorization", format!("Bearer {}", key));
391            }
392        }
393        Provider::Anthropic => {
394            if let Some(ref key) = api_key {
395                req = req.header("x-api-key", key.as_str());
396            }
397            req = req.header("anthropic-version", "2023-06-01");
398        }
399    }
400
401    let result = lua.create_table()?;
402    result.set("provider", provider.as_str())?;
403    result.set("base_url", base_url.as_str())?;
404
405    match tokio::task::block_in_place(|| handle.block_on(req.send())) {
406        Ok(resp) => {
407            let latency = start.elapsed();
408            let status = resp.status().as_u16();
409            // ok=true means "server responded" (reachable). The caller should
410            // inspect `status` to distinguish 200 from 401/403/5xx etc.
411            result.set("ok", true)?;
412            result.set("status", status)?;
413            result.set("latency_ms", latency.as_millis() as u64)?;
414        }
415        Err(e) => {
416            let latency = start.elapsed();
417            result.set("latency_ms", latency.as_millis() as u64)?;
418
419            let (error_kind, error_msg) = retry::classify_reqwest_error(&e);
420            result.set("ok", false)?;
421            result.set("error", error_msg)?;
422            result.set("error_kind", error_kind)?;
423        }
424    }
425
426    Ok(result)
427}
428
429// ── Deny Stub ──────────────────────────────────────────────────────────
430
431/// Registers `orcs.llm` as a deny-by-default stub.
432///
433/// The real implementation is injected by `ctx_fns.rs` / `child.rs`
434/// when a `ChildContext` with `Capability::LLM` is available.
435pub fn register_llm_deny_stub(lua: &Lua, orcs_table: &Table) -> Result<(), mlua::Error> {
436    if orcs_table.get::<mlua::Function>("llm").is_err() {
437        let llm_fn = lua.create_function(|lua, _args: mlua::MultiValue| {
438            let result = lua.create_table()?;
439            result.set("ok", false)?;
440            result.set(
441                "error",
442                "llm denied: no execution context (ChildContext with Capability::LLM required)",
443            )?;
444            result.set("error_kind", "permission_denied")?;
445            Ok(result)
446        })?;
447        orcs_table.set("llm", llm_fn)?;
448    }
449
450    // llm_ping deny stub
451    if orcs_table.get::<mlua::Function>("llm_ping").is_err() {
452        let ping_fn = lua.create_function(|lua, _args: mlua::MultiValue| {
453            let result = lua.create_table()?;
454            result.set("ok", false)?;
455            result.set(
456                "error",
457                "llm_ping denied: no execution context (ChildContext with Capability::LLM required)",
458            )?;
459            result.set("error_kind", "permission_denied")?;
460            Ok(result)
461        })?;
462        orcs_table.set("llm_ping", ping_fn)?;
463    }
464
465    // Session persistence: dump all sessions to JSON string
466    let dump_fn = lua.create_function(|lua, ()| {
467        ensure_session_store(lua);
468        match lua.app_data_ref::<SessionStore>() {
469            Some(store) => serde_json::to_string(&store.0)
470                .map_err(|e| mlua::Error::RuntimeError(format!("session serialize error: {e}"))),
471            None => Ok("{}".to_string()),
472        }
473    })?;
474    orcs_table.set("llm_dump_sessions", dump_fn)?;
475
476    // Session persistence: load sessions from JSON string
477    let load_fn = lua.create_function(|lua, json_str: String| {
478        let sessions: HashMap<String, Vec<Message>> = serde_json::from_str(&json_str)
479            .map_err(|e| mlua::Error::RuntimeError(format!("session deserialize error: {e}")))?;
480        let count = sessions.len();
481        let _ = lua.remove_app_data::<SessionStore>();
482        lua.set_app_data(SessionStore(sessions));
483
484        let result = lua.create_table()?;
485        result.set("ok", true)?;
486        result.set("count", count)?;
487        Ok(result)
488    })?;
489    orcs_table.set("llm_load_sessions", load_fn)?;
490
491    Ok(())
492}
493
494// ── Request Implementation ─────────────────────────────────────────────
495
496/// Executes an LLM chat request. Called from capability-gated context.
497///
498/// # Arguments (from Lua)
499///
500/// * `prompt` - User message text
501/// * `opts` - Optional table:
502///   - `provider` - "ollama" (default), "openai", "anthropic"
503///   - `base_url` - Provider base URL (default per provider)
504///   - `model` - Model name (default per provider)
505///   - `api_key` - API key (falls back to env var)
506///   - `system_prompt` - System prompt text
507///   - `session_id` - Session ID for multi-turn (nil = new session)
508///   - `temperature` - Sampling temperature
509///   - `max_tokens` - Max completion tokens
510///   - `timeout` - Per-request timeout in seconds (default: 120)
511///   - `overall_timeout` - Wall-clock timeout in seconds for the entire
512///     resolve loop (default: nil = no limit). When set, the loop aborts
513///     with `error_kind = "overall_timeout"` if the deadline is exceeded.
514///
515/// # Returns (Lua table)
516///
517/// * `ok` - boolean
518/// * `content` - Response text (when ok=true)
519/// * `model` - Model name from response
520/// * `session_id` - Session ID (new or existing)
521/// * `error` - Error message (when ok=false)
522/// * `error_kind` - Error classification
523pub fn llm_request_impl(lua: &Lua, args: (String, Option<Table>)) -> mlua::Result<Table> {
524    let (prompt, opts) = args;
525
526    // Parse options
527    let llm_opts = match LlmOpts::from_lua(opts.as_ref()) {
528        Ok(o) => o,
529        Err(e) => {
530            let result = lua.create_table()?;
531            result.set("ok", false)?;
532            result.set("error", e)?;
533            result.set("error_kind", "invalid_options")?;
534            return Ok(result);
535        }
536    };
537
538    // Validate API key requirement for providers that need one
539    if llm_opts.provider.requires_api_key() && llm_opts.api_key.is_none() {
540        let env_name = llm_opts
541            .provider
542            .api_key_env()
543            .unwrap_or("(unknown env var)");
544        let result = lua.create_table()?;
545        result.set("ok", false)?;
546        result.set(
547            "error",
548            format!(
549                "API key required for {:?}: set opts.api_key or {} environment variable",
550                llm_opts.provider, env_name
551            ),
552        )?;
553        result.set("error_kind", "missing_api_key")?;
554        return Ok(result);
555    }
556
557    // Session management: check for HIL resume session first, then get or create.
558    let hil_resume = if llm_opts.hil_intents {
559        take_hil_session_id(lua)
560    } else {
561        None
562    };
563    let session_id = if let Some(ref saved) = hil_resume {
564        saved.clone()
565    } else {
566        resolve_session_id(lua, &llm_opts.session_id)
567    };
568
569    // Build tools JSON from IntentRegistry (when opts.tools is true)
570    tracing::info!(
571        "llm tools_enabled={}, resolve={}, provider={:?}",
572        llm_opts.tools,
573        llm_opts.resolve,
574        llm_opts.provider
575    );
576    let tools_json = if llm_opts.tools {
577        build_tools_for_provider(lua, llm_opts.provider)
578    } else {
579        None
580    };
581
582    // Build URL
583    let url = format!(
584        "{}{}",
585        llm_opts.base_url.trim_end_matches('/'),
586        llm_opts.provider.chat_path()
587    );
588
589    // Get shared client (reused across retries and tool turns)
590    let client = get_or_init_http_client(lua)?;
591
592    // ── First turn: build messages from history + prompt ──
593    //
594    // HIL resume: the session already contains full conversation history
595    // (including a synthetic tool_result for the approved operation).
596    // Use a short continuation prompt instead of repeating the original task.
597    let effective_prompt: std::borrow::Cow<'_, str> = if hil_resume.is_some() {
598        "The previously suspended operation has been approved by the user. \
599         Please execute it now."
600            .into()
601    } else {
602        std::borrow::Cow::Borrowed(&prompt)
603    };
604    let mut messages = build_messages(lua, &session_id, &effective_prompt, &llm_opts);
605
606    // Store initial prompt in session so HIL resume can reconstruct the
607    // full message sequence (API requires first message to be user role).
608    if hil_resume.is_none() && session_message_count(lua, &session_id) == 0 {
609        append_message(
610            lua,
611            &session_id,
612            orcs_types::intent::Role::User,
613            MessageContent::Text(prompt.to_string()),
614        );
615    }
616
617    // ── HIL turn offset ──
618    // When hil_intents=true and a previous call was Suspended, the accumulator
619    // holds the number of turns already consumed.  Read-and-reset so only a
620    // subsequent Suspended writes a new value.
621    let turn_offset = if llm_opts.hil_intents {
622        take_hil_turn_accumulator(lua)
623    } else {
624        0
625    };
626    let remaining_budget = llm_opts.max_tool_turns.saturating_sub(turn_offset);
627
628    // ── Wall-clock deadline ──
629    // When overall_timeout is set, the entire resolve loop is bounded by a
630    // wall-clock deadline. Checked at each iteration before sending an API call.
631    let deadline = llm_opts
632        .overall_timeout
633        .map(|secs| Instant::now() + Duration::from_secs(secs));
634
635    // ── Tool loop ──
636    // Each iteration: build body → send → parse → if tool_use && resolve → dispatch → append results → repeat
637    tracing::info!(
638        max_tool_turns = llm_opts.max_tool_turns,
639        turn_offset = turn_offset,
640        remaining_budget = remaining_budget,
641        resolve = llm_opts.resolve,
642        reminder_threshold = TURN_REMINDER_THRESHOLD,
643        overall_timeout = ?llm_opts.overall_timeout,
644        "resolve loop config"
645    );
646    for tool_turn in 0..=remaining_budget {
647        let global_turn = turn_offset + tool_turn;
648
649        // Check wall-clock deadline before each API call
650        if let Some(dl) = deadline {
651            if Instant::now() >= dl {
652                tracing::warn!(
653                    overall_timeout = llm_opts.overall_timeout,
654                    tool_turn = global_turn,
655                    session_id = %session_id,
656                    "overall timeout exceeded"
657                );
658                let result = lua.create_table()?;
659                result.set("ok", false)?;
660                result.set(
661                    "error",
662                    format!(
663                        "overall timeout exceeded ({}s) at tool turn {}",
664                        llm_opts.overall_timeout.unwrap_or(0),
665                        global_turn
666                    ),
667                )?;
668                result.set("error_kind", "overall_timeout")?;
669                result.set("session_id", session_id.clone())?;
670                return Ok(result);
671            }
672        }
673        let request_body = match build_request_body(&llm_opts, &messages, tools_json.as_ref()) {
674            Ok(body) => body,
675            Err(e) => {
676                let result = lua.create_table()?;
677                result.set("ok", false)?;
678                result.set("error", e)?;
679                result.set("error_kind", "request_build_error")?;
680                return Ok(result);
681            }
682        };
683
684        let body_str = request_body.to_string();
685        let tool_count = tools_json
686            .as_ref()
687            .and_then(|t| t.as_array())
688            .map_or(0, |a| a.len());
689        tracing::info!(
690            "llm request turn={}: {} {} ({}B, tools={})",
691            global_turn,
692            llm_opts.provider.chat_path(),
693            llm_opts.model,
694            body_str.len(),
695            tool_count
696        );
697        tracing::debug!(turn = global_turn, "llm request body:\n{}", body_str);
698
699        // Send with retry
700        let resp = match send_with_retry(&client, &url, &llm_opts, &body_str) {
701            Ok(resp) => resp,
702            Err(SendError::Transport(e)) => return build_error_result(lua, e, &session_id),
703            Err(SendError::Classified { kind, message }) => {
704                return build_classified_error_result(lua, kind, &message, &session_id);
705            }
706        };
707
708        // Parse response
709        let parsed_resp = match parse_response_body(lua, resp, &llm_opts, &session_id)? {
710            ResponseOrError::Parsed(p) => p,
711            ResponseOrError::ErrorTable(t) => return Ok(t),
712        };
713
714        let is_tool_use = parsed_resp.stop_reason == StopReason::ToolUse;
715        let should_resolve = is_tool_use && llm_opts.resolve && !parsed_resp.intents.is_empty();
716        tracing::info!(
717            "llm response turn={}: stop_reason={:?}, intents={}, resolve={}, content_len={}",
718            global_turn,
719            parsed_resp.stop_reason,
720            parsed_resp.intents.len(),
721            should_resolve,
722            parsed_resp.content.len()
723        );
724
725        if should_resolve && global_turn < llm_opts.max_tool_turns {
726            // ── Auto-resolve: dispatch intents and continue loop ──
727
728            // Build assistant message with ContentBlocks (preserves tool_use blocks)
729            let assistant_blocks = build_assistant_content_blocks(&parsed_resp);
730            messages.push(session::Message {
731                role: orcs_types::intent::Role::Assistant,
732                content: assistant_blocks.clone(),
733            });
734            append_message(
735                lua,
736                &session_id,
737                orcs_types::intent::Role::Assistant,
738                assistant_blocks,
739            );
740
741            // Dispatch each intent and collect tool results.
742            //
743            // hil_intents=true: Suspended propagates → session preserved with
744            //   synthetic tool_results → ChannelRunner triggers HIL → user
745            //   approves → on_request replays with full conversation context.
746            //
747            // hil_intents=false: Suspended caught → error tool_results synthesized
748            //   so the session stays consistent (every tool_use has a matching
749            //   tool_result), preventing API 400 on session resume.
750            let mut tool_result_content = match dispatch_intents_to_results(
751                lua,
752                &parsed_resp.intents,
753                llm_opts.hil_intents,
754            ) {
755                Ok(content) => content,
756                Err(e) => {
757                    if llm_opts.hil_intents && crate::extract_suspended_info(&e).is_some() {
758                        // HIL resume mode: keep session history intact so the
759                        // re-dispatched call can continue with full context.
760                        // Add synthetic tool_results so every tool_use has a
761                        // matching result (API requirement).
762                        set_hil_turn_accumulator(lua, global_turn + 1);
763                        let resume_blocks: Vec<ContentBlock> = parsed_resp
764                            .intents
765                            .iter()
766                            .map(|intent| ContentBlock::ToolResult {
767                                tool_use_id: intent.id.clone(),
768                                content: "Operation suspended pending user approval. \
769                                          Once approved, please retry this operation."
770                                    .to_string(),
771                                is_error: Some(false),
772                            })
773                            .collect();
774                        let resume_content = MessageContent::Blocks(resume_blocks);
775                        append_message(
776                            lua,
777                            &session_id,
778                            orcs_types::intent::Role::User,
779                            resume_content,
780                        );
781                        set_hil_session_id(lua, session_id.clone());
782                        return Err(e);
783                    }
784                    // Non-HIL: synthesize error tool_results for session consistency.
785                    let error_blocks: Vec<ContentBlock> = parsed_resp
786                        .intents
787                        .iter()
788                        .map(|intent| ContentBlock::ToolResult {
789                            tool_use_id: intent.id.clone(),
790                            content: format!("dispatch error: {e}"),
791                            is_error: Some(true),
792                        })
793                        .collect();
794                    let fallback = MessageContent::Blocks(error_blocks);
795                    messages.push(session::Message {
796                        role: orcs_types::intent::Role::User,
797                        content: fallback.clone(),
798                    });
799                    append_message(lua, &session_id, orcs_types::intent::Role::User, fallback);
800                    return Err(e);
801                }
802            };
803
804            // ── Turn budget reminder ──
805            // Inject a reminder when approaching the turn limit so the LLM
806            // can prioritize remaining work (research shows 27-60% perf drop
807            // without reminders).
808            let remaining = llm_opts.max_tool_turns.saturating_sub(global_turn + 1);
809            if remaining > 0 && remaining <= TURN_REMINDER_THRESHOLD {
810                match tool_result_content {
811                    MessageContent::Blocks(ref mut blocks) => {
812                        blocks.push(ContentBlock::Text {
813                            text: format!(
814                                "[System] You have {} tool turn(s) remaining before the limit. \
815                                 Prioritize completing the most important remaining work.",
816                                remaining
817                            ),
818                        });
819                        tracing::warn!(
820                            tool_turn = global_turn,
821                            remaining = remaining,
822                            max_tool_turns = llm_opts.max_tool_turns,
823                            "turn budget reminder injected"
824                        );
825                    }
826                    _ => {
827                        tracing::warn!(
828                            tool_turn = global_turn,
829                            remaining = remaining,
830                            "turn budget reminder skipped: tool_result_content is not Blocks"
831                        );
832                    }
833                }
834            }
835
836            messages.push(session::Message {
837                role: orcs_types::intent::Role::User,
838                content: tool_result_content.clone(),
839            });
840            append_message(
841                lua,
842                &session_id,
843                orcs_types::intent::Role::User,
844                tool_result_content,
845            );
846
847            let intent_names: Vec<&str> = parsed_resp
848                .intents
849                .iter()
850                .map(|i| i.name.as_str())
851                .collect();
852            tracing::info!(
853                "tool turn {}: resolved {} intent(s) [{}], continuing",
854                global_turn,
855                parsed_resp.intents.len(),
856                intent_names.join(", ")
857            );
858            continue;
859        }
860
861        // ── MaxTokens continuation: recover from truncated output ──
862        //
863        // When the LLM hits max_tokens during a resolve loop, the output is
864        // truncated.  Two sub-cases:
865        //   (a) Complete intents exist → resolve them, then continue so the
866        //       model can finish its remaining work.
867        //   (b) No intents (text-only truncation) → store partial text and
868        //       inject a continuation prompt so the model picks up where it
869        //       left off.
870        let should_continue_on_max_tokens = parsed_resp.stop_reason == StopReason::MaxTokens
871            && llm_opts.resolve
872            && global_turn < llm_opts.max_tool_turns;
873
874        if should_continue_on_max_tokens {
875            tracing::warn!(
876                "llm response truncated by max_tokens at turn={} (intents={}, content_len={}), attempting continuation",
877                global_turn,
878                parsed_resp.intents.len(),
879                parsed_resp.content.len()
880            );
881
882            if !parsed_resp.intents.is_empty() {
883                // Case (a): complete intents survived truncation — resolve them.
884                let assistant_blocks = build_assistant_content_blocks(&parsed_resp);
885                messages.push(session::Message {
886                    role: orcs_types::intent::Role::Assistant,
887                    content: assistant_blocks.clone(),
888                });
889                append_message(
890                    lua,
891                    &session_id,
892                    orcs_types::intent::Role::Assistant,
893                    assistant_blocks,
894                );
895
896                let tool_result_content = match dispatch_intents_to_results(
897                    lua,
898                    &parsed_resp.intents,
899                    llm_opts.hil_intents,
900                ) {
901                    Ok(content) => content,
902                    Err(e) => {
903                        if llm_opts.hil_intents && crate::extract_suspended_info(&e).is_some() {
904                            set_hil_turn_accumulator(lua, global_turn + 1);
905                            let resume_blocks: Vec<ContentBlock> = parsed_resp
906                                .intents
907                                .iter()
908                                .map(|intent| ContentBlock::ToolResult {
909                                    tool_use_id: intent.id.clone(),
910                                    content: "Operation suspended pending user approval. \
911                                              Once approved, please retry this operation."
912                                        .to_string(),
913                                    is_error: Some(false),
914                                })
915                                .collect();
916                            let resume_content = MessageContent::Blocks(resume_blocks);
917                            append_message(
918                                lua,
919                                &session_id,
920                                orcs_types::intent::Role::User,
921                                resume_content,
922                            );
923                            set_hil_session_id(lua, session_id.clone());
924                            return Err(e);
925                        }
926                        let error_blocks: Vec<ContentBlock> = parsed_resp
927                            .intents
928                            .iter()
929                            .map(|intent| ContentBlock::ToolResult {
930                                tool_use_id: intent.id.clone(),
931                                content: format!("dispatch error: {e}"),
932                                is_error: Some(true),
933                            })
934                            .collect();
935                        let fallback = MessageContent::Blocks(error_blocks);
936                        messages.push(session::Message {
937                            role: orcs_types::intent::Role::User,
938                            content: fallback.clone(),
939                        });
940                        append_message(lua, &session_id, orcs_types::intent::Role::User, fallback);
941                        return Err(e);
942                    }
943                };
944                messages.push(session::Message {
945                    role: orcs_types::intent::Role::User,
946                    content: tool_result_content.clone(),
947                });
948                append_message(
949                    lua,
950                    &session_id,
951                    orcs_types::intent::Role::User,
952                    tool_result_content,
953                );
954            } else {
955                // Case (b): text-only truncation — store partial content and
956                // inject a continuation prompt.
957                let assistant_content = MessageContent::Text(parsed_resp.content.clone());
958                messages.push(session::Message {
959                    role: orcs_types::intent::Role::Assistant,
960                    content: assistant_content.clone(),
961                });
962                append_message(
963                    lua,
964                    &session_id,
965                    orcs_types::intent::Role::Assistant,
966                    assistant_content,
967                );
968
969                let continuation = MessageContent::Text(
970                    "Your previous response was truncated due to output token limits. \
971                     Please continue from where you left off."
972                        .to_string(),
973                );
974                messages.push(session::Message {
975                    role: orcs_types::intent::Role::User,
976                    content: continuation.clone(),
977                });
978                append_message(
979                    lua,
980                    &session_id,
981                    orcs_types::intent::Role::User,
982                    continuation,
983                );
984            }
985            continue;
986        }
987
988        // ── Final response: return to Lua ──
989        // For non-resolve mode or final turn: store text-only in session
990        update_session(lua, &session_id, &prompt, &parsed_resp.content);
991
992        return build_lua_result(lua, &parsed_resp, &llm_opts, &session_id);
993    }
994
995    // Tool loop exhausted
996    tracing::warn!(
997        max_tool_turns = llm_opts.max_tool_turns,
998        turn_offset = turn_offset,
999        session_id = %session_id,
1000        "tool loop exhausted: reached max_tool_turns limit"
1001    );
1002    let result = lua.create_table()?;
1003    result.set("ok", false)?;
1004    result.set(
1005        "error",
1006        format!(
1007            "tool loop exceeded max_tool_turns ({})",
1008            llm_opts.max_tool_turns
1009        ),
1010    )?;
1011    result.set("error_kind", "tool_loop_limit")?;
1012    result.set("session_id", session_id)?;
1013    Ok(result)
1014}
1015
1016// ── Tests ──────────────────────────────────────────────────────────────
1017
1018#[cfg(test)]
1019mod tests {
1020    use super::*;
1021
1022    // ── LlmOpts tests ─────────────────────────────────────────────────
1023
1024    #[test]
1025    fn llm_opts_defaults_to_ollama() {
1026        let opts = LlmOpts::from_lua(None).expect("should parse None opts");
1027        assert_eq!(opts.provider, Provider::Ollama);
1028        assert_eq!(opts.base_url, "http://localhost:11434");
1029        assert_eq!(opts.model, "llama3.2");
1030        assert_eq!(opts.timeout, 120);
1031        assert!(opts.api_key.is_none());
1032    }
1033
1034    #[test]
1035    fn llm_opts_parses_provider() {
1036        let lua = Lua::new();
1037        let tbl = lua.create_table().expect("create table");
1038        tbl.set("provider", "anthropic").expect("set provider");
1039        tbl.set("api_key", "test-key").expect("set api_key");
1040
1041        let opts = LlmOpts::from_lua(Some(&tbl)).expect("should parse opts");
1042        assert_eq!(opts.provider, Provider::Anthropic);
1043        assert_eq!(opts.base_url, "https://api.anthropic.com");
1044        assert_eq!(opts.model, "claude-sonnet-4-20250514");
1045        assert_eq!(opts.api_key.as_deref(), Some("test-key"));
1046    }
1047
1048    #[test]
1049    fn llm_opts_custom_overrides() {
1050        let lua = Lua::new();
1051        let tbl = lua.create_table().expect("create table");
1052        tbl.set("provider", "openai").expect("set provider");
1053        tbl.set("base_url", "https://custom.api.com")
1054            .expect("set base_url");
1055        tbl.set("model", "gpt-4o-mini").expect("set model");
1056        tbl.set("temperature", 0.5).expect("set temperature");
1057        tbl.set("max_tokens", 2048u64).expect("set max_tokens");
1058        tbl.set("timeout", 60u64).expect("set timeout");
1059        tbl.set("api_key", "sk-test").expect("set api_key");
1060
1061        let opts = LlmOpts::from_lua(Some(&tbl)).expect("should parse opts");
1062        assert_eq!(opts.provider, Provider::OpenAI);
1063        assert_eq!(opts.base_url, "https://custom.api.com");
1064        assert_eq!(opts.model, "gpt-4o-mini");
1065        assert_eq!(opts.temperature, Some(0.5));
1066        assert_eq!(opts.max_tokens, Some(2048));
1067        assert_eq!(opts.timeout, 60);
1068        assert_eq!(opts.api_key.as_deref(), Some("sk-test"));
1069    }
1070
1071    #[test]
1072    fn llm_opts_invalid_provider() {
1073        let lua = Lua::new();
1074        let tbl = lua.create_table().expect("create table");
1075        tbl.set("provider", "gpt").expect("set provider");
1076
1077        let err = LlmOpts::from_lua(Some(&tbl)).expect_err("should reject invalid provider");
1078        assert!(
1079            err.contains("unsupported provider"),
1080            "error should mention unsupported, got: {}",
1081            err
1082        );
1083    }
1084
1085    #[test]
1086    fn llm_opts_default_max_retries() {
1087        let opts = LlmOpts::from_lua(None).expect("should parse None opts");
1088        assert_eq!(opts.max_retries, DEFAULT_MAX_RETRIES);
1089    }
1090
1091    #[test]
1092    fn llm_opts_custom_max_retries() {
1093        let lua = Lua::new();
1094        let tbl = lua.create_table().expect("create table");
1095        tbl.set("max_retries", 5u32).expect("set max_retries");
1096
1097        let opts = LlmOpts::from_lua(Some(&tbl)).expect("should parse opts");
1098        assert_eq!(opts.max_retries, 5);
1099    }
1100
1101    #[test]
1102    fn llm_opts_zero_max_retries() {
1103        let lua = Lua::new();
1104        let tbl = lua.create_table().expect("create table");
1105        tbl.set("max_retries", 0u32).expect("set max_retries");
1106
1107        let opts = LlmOpts::from_lua(Some(&tbl)).expect("should parse opts");
1108        assert_eq!(opts.max_retries, 0);
1109    }
1110
1111    // ── Boolean field nil-coercion tests ─────────────────────────────
1112    //
1113    // mlua coerces Lua nil → false for bool (Lua falsiness). Missing keys
1114    // in a Lua table return nil, so get::<bool>("missing") → Ok(false).
1115    // This makes unwrap_or(default) useless when default != false.
1116    // We use Option<bool> + flatten to distinguish nil from explicit false.
1117
1118    #[test]
1119    fn llm_opts_tools_defaults_true_when_key_absent() {
1120        let lua = Lua::new();
1121        let tbl = lua.create_table().expect("create table");
1122        // No "tools" key set — should default to true
1123        let opts = LlmOpts::from_lua(Some(&tbl)).expect("parse opts");
1124        assert!(
1125            opts.tools,
1126            "tools should default to true when key is absent"
1127        );
1128    }
1129
1130    #[test]
1131    fn llm_opts_tools_defaults_true_when_opts_none() {
1132        let opts = LlmOpts::from_lua(None).expect("parse None opts");
1133        assert!(opts.tools, "tools should default to true when opts is None");
1134    }
1135
1136    #[test]
1137    fn llm_opts_tools_explicit_false() {
1138        let lua = Lua::new();
1139        let tbl = lua.create_table().expect("create table");
1140        tbl.set("tools", false).expect("set tools");
1141        let opts = LlmOpts::from_lua(Some(&tbl)).expect("parse opts");
1142        assert!(!opts.tools, "tools should be false when explicitly set");
1143    }
1144
1145    #[test]
1146    fn llm_opts_tools_explicit_true() {
1147        let lua = Lua::new();
1148        let tbl = lua.create_table().expect("create table");
1149        tbl.set("tools", true).expect("set tools");
1150        let opts = LlmOpts::from_lua(Some(&tbl)).expect("parse opts");
1151        assert!(opts.tools, "tools should be true when explicitly set");
1152    }
1153
1154    #[test]
1155    fn llm_opts_resolve_defaults_false_when_key_absent() {
1156        let lua = Lua::new();
1157        let tbl = lua.create_table().expect("create table");
1158        let opts = LlmOpts::from_lua(Some(&tbl)).expect("parse opts");
1159        assert!(
1160            !opts.resolve,
1161            "resolve should default to false when key is absent"
1162        );
1163        assert_eq!(
1164            opts.max_tool_turns, DEFAULT_MAX_TOOL_TURNS,
1165            "max_tool_turns should default"
1166        );
1167    }
1168
1169    #[test]
1170    fn llm_opts_resolve_explicit_true() {
1171        let lua = Lua::new();
1172        let tbl = lua.create_table().expect("create table");
1173        tbl.set("resolve", true).expect("set resolve");
1174        tbl.set("max_tool_turns", 3u32).expect("set max_tool_turns");
1175        let opts = LlmOpts::from_lua(Some(&tbl)).expect("parse opts");
1176        assert!(opts.resolve, "resolve should be true when explicitly set");
1177        assert_eq!(opts.max_tool_turns, 3, "max_tool_turns should be 3");
1178    }
1179
1180    #[test]
1181    fn llm_opts_resolve_explicit_false() {
1182        let lua = Lua::new();
1183        let tbl = lua.create_table().expect("create table");
1184        tbl.set("resolve", false).expect("set resolve");
1185        let opts = LlmOpts::from_lua(Some(&tbl)).expect("parse opts");
1186        assert!(!opts.resolve, "resolve should be false when explicitly set");
1187    }
1188
1189    // ── overall_timeout tests ─────────────────────────────────────────
1190
1191    #[test]
1192    fn llm_opts_overall_timeout_none_by_default() {
1193        let opts = LlmOpts::from_lua(None).expect("should parse None opts");
1194        assert_eq!(opts.overall_timeout, None);
1195    }
1196
1197    #[test]
1198    fn llm_opts_overall_timeout_none_when_key_absent() {
1199        let lua = Lua::new();
1200        let tbl = lua.create_table().expect("create table");
1201        let opts = LlmOpts::from_lua(Some(&tbl)).expect("parse opts");
1202        assert_eq!(
1203            opts.overall_timeout, None,
1204            "overall_timeout should be None when key is absent"
1205        );
1206    }
1207
1208    #[test]
1209    fn llm_opts_overall_timeout_parsed() {
1210        let lua = Lua::new();
1211        let tbl = lua.create_table().expect("create table");
1212        tbl.set("overall_timeout", 30u64)
1213            .expect("set overall_timeout");
1214        let opts = LlmOpts::from_lua(Some(&tbl)).expect("parse opts");
1215        assert_eq!(
1216            opts.overall_timeout,
1217            Some(30),
1218            "overall_timeout should be 30 when explicitly set"
1219        );
1220    }
1221
1222    // ── Deny stub test ─────────────────────────────────────────────────
1223
1224    #[test]
1225    fn deny_stub_returns_permission_denied() {
1226        let lua = Lua::new();
1227        let orcs = crate::orcs_helpers::ensure_orcs_table(&lua).expect("create orcs table");
1228        register_llm_deny_stub(&lua, &orcs).expect("register stub");
1229
1230        let result: Table = lua
1231            .load(r#"return orcs.llm("hello")"#)
1232            .eval()
1233            .expect("should return deny table");
1234
1235        assert!(!result.get::<bool>("ok").expect("get ok"));
1236        let error: String = result.get("error").expect("get error");
1237        assert!(
1238            error.contains("llm denied"),
1239            "expected permission denied, got: {error}"
1240        );
1241        assert_eq!(
1242            result.get::<String>("error_kind").expect("get error_kind"),
1243            "permission_denied"
1244        );
1245    }
1246
1247    // ── Session persistence tests ─────────────────────────────────────
1248
1249    #[test]
1250    fn session_dump_empty() {
1251        let lua = Lua::new();
1252        let orcs = crate::orcs_helpers::ensure_orcs_table(&lua).expect("create orcs table");
1253        register_llm_deny_stub(&lua, &orcs).expect("register stub");
1254
1255        let json: String = lua
1256            .load(r#"return orcs.llm_dump_sessions()"#)
1257            .eval()
1258            .expect("should return json string");
1259        assert_eq!(json, "{}");
1260    }
1261
1262    #[test]
1263    fn session_dump_with_history() {
1264        let lua = Lua::new();
1265        let orcs = crate::orcs_helpers::ensure_orcs_table(&lua).expect("create orcs table");
1266        register_llm_deny_stub(&lua, &orcs).expect("register stub");
1267
1268        // Create a session with some history
1269        let sid = resolve_session_id(&lua, &None);
1270        update_session(&lua, &sid, "hello", "world");
1271
1272        let json: String = lua
1273            .load(r#"return orcs.llm_dump_sessions()"#)
1274            .eval()
1275            .expect("should return json string");
1276
1277        let parsed: serde_json::Value = serde_json::from_str(&json).expect("should be valid JSON");
1278        assert!(parsed.is_object(), "should be JSON object");
1279        let sessions = parsed.as_object().expect("should be object");
1280        assert_eq!(sessions.len(), 1, "should have one session");
1281
1282        let history = sessions.get(&sid).expect("should have session by id");
1283        let msgs = history.as_array().expect("should be array");
1284        assert_eq!(msgs.len(), 2, "should have 2 messages");
1285        assert_eq!(msgs[0]["role"], "user");
1286        assert_eq!(msgs[0]["content"], "hello");
1287        assert_eq!(msgs[1]["role"], "assistant");
1288        assert_eq!(msgs[1]["content"], "world");
1289    }
1290
1291    #[test]
1292    fn session_load_roundtrip() {
1293        let lua = Lua::new();
1294        let orcs = crate::orcs_helpers::ensure_orcs_table(&lua).expect("create orcs table");
1295        register_llm_deny_stub(&lua, &orcs).expect("register stub");
1296
1297        // Create sessions
1298        let sid1 = resolve_session_id(&lua, &None);
1299        update_session(&lua, &sid1, "q1", "a1");
1300        let sid2 = resolve_session_id(&lua, &None);
1301        update_session(&lua, &sid2, "q2", "a2");
1302
1303        // Dump
1304        let json: String = lua
1305            .load(r#"return orcs.llm_dump_sessions()"#)
1306            .eval()
1307            .expect("dump should succeed");
1308
1309        // Clear store
1310        let _ = lua.remove_app_data::<SessionStore>();
1311
1312        // Load back
1313        lua.globals()
1314            .get::<Table>("orcs")
1315            .expect("get orcs table")
1316            .get::<mlua::Function>("llm_load_sessions")
1317            .expect("get load fn")
1318            .call::<Table>(json.clone())
1319            .expect("load should succeed");
1320
1321        // Verify restored
1322        let store = lua
1323            .app_data_ref::<SessionStore>()
1324            .expect("store should exist");
1325        assert_eq!(store.0.len(), 2, "should have 2 sessions");
1326        let h1 = store.0.get(&sid1).expect("session 1 should exist");
1327        assert_eq!(h1.len(), 2);
1328        assert_eq!(h1[0].content.text(), Some("q1"));
1329        assert_eq!(h1[1].content.text(), Some("a1"));
1330    }
1331
1332    #[test]
1333    fn session_load_invalid_json() {
1334        let lua = Lua::new();
1335        let orcs = crate::orcs_helpers::ensure_orcs_table(&lua).expect("create orcs table");
1336        register_llm_deny_stub(&lua, &orcs).expect("register stub");
1337
1338        let result = lua
1339            .load(r#"return orcs.llm_load_sessions("not valid json")"#)
1340            .eval::<Table>();
1341
1342        assert!(
1343            result.is_err(),
1344            "should error on invalid JSON, got: {:?}",
1345            result
1346        );
1347    }
1348
1349    #[test]
1350    fn session_load_returns_count() {
1351        let lua = Lua::new();
1352        let orcs = crate::orcs_helpers::ensure_orcs_table(&lua).expect("create orcs table");
1353        register_llm_deny_stub(&lua, &orcs).expect("register stub");
1354
1355        let json = r#"{"sess-1": [{"role":"user","content":"hi"}], "sess-2": []}"#;
1356        let result: Table = lua
1357            .load(format!(
1358                r#"return orcs.llm_load_sessions('{}')"#,
1359                json.replace('\'', "\\'")
1360            ))
1361            .eval()
1362            .expect("load should succeed");
1363
1364        assert!(result.get::<bool>("ok").expect("get ok"));
1365        assert_eq!(
1366            result.get::<i64>("count").expect("get count"),
1367            2,
1368            "should report 2 sessions loaded"
1369        );
1370    }
1371
1372    // ── Ping tests ──────────────────────────────────────────────────────
1373
1374    #[tokio::test(flavor = "multi_thread")]
1375    async fn ping_defaults_to_ollama() {
1376        let lua = Lua::new();
1377        let result = llm_ping_impl(&lua, None).expect("should not panic");
1378
1379        let provider: String = result.get("provider").expect("get provider");
1380        assert_eq!(provider, "ollama");
1381
1382        let base_url: String = result.get("base_url").expect("get base_url");
1383        assert_eq!(base_url, "http://localhost:11434");
1384
1385        // latency_ms is always present
1386        let _: u64 = result.get("latency_ms").expect("get latency_ms");
1387    }
1388
1389    #[test]
1390    fn ping_invalid_provider() {
1391        let lua = Lua::new();
1392        let opts = lua.create_table().expect("create opts");
1393        opts.set("provider", "gemini").expect("set provider");
1394
1395        let result = llm_ping_impl(&lua, Some(opts)).expect("should not panic");
1396        assert!(!result.get::<bool>("ok").expect("get ok"));
1397        assert_eq!(
1398            result.get::<String>("error_kind").expect("get error_kind"),
1399            "invalid_options"
1400        );
1401    }
1402
1403    #[tokio::test(flavor = "multi_thread")]
1404    async fn ping_connection_refused() {
1405        let lua = Lua::new();
1406        let opts = lua.create_table().expect("create opts");
1407        opts.set("provider", "ollama").expect("set provider");
1408        opts.set("base_url", "http://127.0.0.1:1")
1409            .expect("set base_url");
1410        opts.set("timeout", 2u64).expect("set timeout");
1411
1412        let result = llm_ping_impl(&lua, Some(opts)).expect("should not panic");
1413        assert!(
1414            !result.get::<bool>("ok").expect("get ok"),
1415            "should fail when nothing is listening"
1416        );
1417
1418        let error_kind: String = result.get("error_kind").expect("get error_kind");
1419        assert!(
1420            error_kind == "connection_refused"
1421                || error_kind == "network"
1422                || error_kind == "timeout",
1423            "expected connection error, got: {}",
1424            error_kind
1425        );
1426    }
1427
1428    #[test]
1429    fn ping_deny_stub_returns_permission_denied() {
1430        let lua = Lua::new();
1431        let orcs = crate::orcs_helpers::ensure_orcs_table(&lua).expect("create orcs table");
1432        register_llm_deny_stub(&lua, &orcs).expect("register stub");
1433
1434        let result: Table = lua
1435            .load(r#"return orcs.llm_ping()"#)
1436            .eval()
1437            .expect("should return deny table");
1438
1439        assert!(!result.get::<bool>("ok").expect("get ok"));
1440        let error: String = result.get("error").expect("get error");
1441        assert!(
1442            error.contains("llm_ping denied"),
1443            "expected permission denied, got: {error}"
1444        );
1445        assert_eq!(
1446            result.get::<String>("error_kind").expect("get error_kind"),
1447            "permission_denied"
1448        );
1449    }
1450
1451    // ── Integration: llm_request_impl with missing API key ─────────────
1452
1453    #[test]
1454    fn openai_missing_api_key_returns_error() {
1455        let lua = Lua::new();
1456        let opts = lua.create_table().expect("create opts");
1457        opts.set("provider", "openai").expect("set provider");
1458
1459        let prev = std::env::var("OPENAI_API_KEY").ok();
1460        std::env::remove_var("OPENAI_API_KEY");
1461
1462        let result =
1463            llm_request_impl(&lua, ("hello".into(), Some(opts))).expect("should not panic");
1464
1465        if let Some(val) = prev {
1466            std::env::set_var("OPENAI_API_KEY", val);
1467        }
1468
1469        assert!(!result.get::<bool>("ok").expect("get ok"));
1470        assert_eq!(
1471            result.get::<String>("error_kind").expect("get error_kind"),
1472            "missing_api_key"
1473        );
1474    }
1475
1476    #[test]
1477    fn anthropic_missing_api_key_returns_error() {
1478        let lua = Lua::new();
1479        let opts = lua.create_table().expect("create opts");
1480        opts.set("provider", "anthropic").expect("set provider");
1481
1482        let prev = std::env::var("ANTHROPIC_API_KEY").ok();
1483        std::env::remove_var("ANTHROPIC_API_KEY");
1484
1485        let result =
1486            llm_request_impl(&lua, ("hello".into(), Some(opts))).expect("should not panic");
1487
1488        if let Some(val) = prev {
1489            std::env::set_var("ANTHROPIC_API_KEY", val);
1490        }
1491
1492        assert!(!result.get::<bool>("ok").expect("get ok"));
1493        assert_eq!(
1494            result.get::<String>("error_kind").expect("get error_kind"),
1495            "missing_api_key"
1496        );
1497    }
1498
1499    #[test]
1500    fn ollama_no_api_key_required() {
1501        let lua = Lua::new();
1502        let opts = lua.create_table().expect("create opts");
1503        opts.set("provider", "ollama").expect("set provider");
1504        opts.set("timeout", 1u64).expect("set timeout");
1505
1506        // This will fail to connect (Ollama likely not running) but should not
1507        // fail due to missing API key
1508        let result =
1509            llm_request_impl(&lua, ("hello".into(), Some(opts))).expect("should not panic");
1510
1511        // If Ollama is running, ok=true; if not, ok=false but NOT missing_api_key
1512        if !result.get::<bool>("ok").expect("get ok") {
1513            let error_kind: String = result.get("error_kind").expect("get error_kind");
1514            assert_ne!(
1515                error_kind, "missing_api_key",
1516                "ollama should not require API key"
1517            );
1518        }
1519    }
1520
1521    // ── Integration: connection error ──────────────────────────────────
1522
1523    #[tokio::test(flavor = "multi_thread")]
1524    async fn connection_refused_returns_network_error() {
1525        let lua = Lua::new();
1526        let opts = lua.create_table().expect("create opts");
1527        opts.set("provider", "ollama").expect("set provider");
1528        opts.set("base_url", "http://127.0.0.1:1")
1529            .expect("set base_url");
1530        opts.set("timeout", 2u64).expect("set timeout");
1531
1532        let result =
1533            llm_request_impl(&lua, ("hello".into(), Some(opts))).expect("should not panic");
1534        assert!(!result.get::<bool>("ok").expect("get ok"));
1535
1536        let error_kind: String = result.get("error_kind").expect("get error_kind");
1537        assert!(
1538            error_kind == "connection_refused"
1539                || error_kind == "network"
1540                || error_kind == "timeout",
1541            "expected connection error, got: {}",
1542            error_kind
1543        );
1544
1545        // Should still have session_id
1546        let session_id: String = result.get("session_id").expect("get session_id");
1547        assert!(
1548            session_id.starts_with("sess-"),
1549            "should have session_id, got: {}",
1550            session_id
1551        );
1552    }
1553
1554    /// connection_refused is NOT retried by default (server not running).
1555    /// With max_retries=0, no retry attempt is made.
1556    #[tokio::test(flavor = "multi_thread")]
1557    async fn connection_refused_no_retry_with_zero_retries() {
1558        let lua = Lua::new();
1559        let opts = lua.create_table().expect("create opts");
1560        opts.set("provider", "ollama").expect("set provider");
1561        opts.set("base_url", "http://127.0.0.1:1")
1562            .expect("set base_url");
1563        opts.set("timeout", 1u64).expect("set timeout");
1564        opts.set("max_retries", 0u32).expect("set max_retries");
1565
1566        let start = std::time::Instant::now();
1567        let result =
1568            llm_request_impl(&lua, ("hello".into(), Some(opts))).expect("should not panic");
1569        let elapsed = start.elapsed();
1570
1571        assert!(!result.get::<bool>("ok").expect("get ok"));
1572        // Should complete quickly (no retry delay)
1573        assert!(
1574            elapsed < Duration::from_secs(5),
1575            "should not retry, elapsed: {:?}",
1576            elapsed
1577        );
1578    }
1579
1580    // ── E2E tests (require running Ollama) ──────────────────────────────
1581
1582    /// E2E: ping a real Ollama server.
1583    /// Run with: cargo test -p orcs-lua --lib llm_command::tests::e2e_ollama_ping -- --ignored --nocapture
1584    #[tokio::test(flavor = "multi_thread")]
1585    #[ignore = "requires running Ollama server"]
1586    async fn e2e_ollama_ping() {
1587        let lua = Lua::new();
1588        let opts = lua.create_table().expect("create opts");
1589        opts.set("provider", "ollama").expect("set provider");
1590        opts.set("timeout", 5u64).expect("set timeout");
1591
1592        let result = llm_ping_impl(&lua, Some(opts)).expect("should not panic");
1593
1594        let ok = result.get::<bool>("ok").expect("get ok");
1595        assert!(ok, "should succeed with running Ollama");
1596
1597        let status: u16 = result.get("status").expect("get status");
1598        assert_eq!(status, 200, "Ollama root should return 200");
1599
1600        let latency: u64 = result.get("latency_ms").expect("get latency_ms");
1601        assert!(
1602            latency < 5000,
1603            "latency should be under 5s, got: {}ms",
1604            latency
1605        );
1606
1607        let provider: String = result.get("provider").expect("get provider");
1608        assert_eq!(provider, "ollama");
1609
1610        eprintln!("[E2E] ping ok={ok} status={status} latency={latency}ms");
1611    }
1612
1613    /// E2E: single-turn call to real Ollama server.
1614    /// Run with: cargo test -p orcs-lua --lib llm_command::tests::e2e_ollama_single_turn -- --ignored --nocapture
1615    #[tokio::test(flavor = "multi_thread")]
1616    #[ignore = "requires running Ollama server"]
1617    async fn e2e_ollama_single_turn() {
1618        let lua = Lua::new();
1619        let opts = lua.create_table().expect("create opts");
1620        opts.set("provider", "ollama").expect("set provider");
1621        opts.set("model", "qwen2.5-coder:1.5b").expect("set model");
1622        opts.set("timeout", 30u64).expect("set timeout");
1623        opts.set("max_retries", 0u32).expect("set max_retries");
1624
1625        let result = llm_request_impl(&lua, ("Say exactly: HELLO_ORCS".into(), Some(opts)))
1626            .expect("should not panic");
1627
1628        let ok = result.get::<bool>("ok").expect("get ok");
1629        assert!(ok, "should succeed with running Ollama");
1630
1631        let content: String = result.get("content").expect("get content");
1632        assert!(!content.is_empty(), "content should not be empty");
1633
1634        let session_id: String = result.get("session_id").expect("get session_id");
1635        assert!(
1636            session_id.starts_with("sess-"),
1637            "should have session_id, got: {}",
1638            session_id
1639        );
1640
1641        let model: String = result.get("model").expect("get model");
1642        assert!(
1643            model.contains("qwen"),
1644            "model should contain qwen, got: {}",
1645            model
1646        );
1647
1648        eprintln!("[E2E] ok={ok} model={model} session_id={session_id}");
1649        eprintln!("[E2E] content: {content}");
1650    }
1651
1652    /// E2E: multi-turn session with real Ollama server.
1653    /// Run with: cargo test -p orcs-lua --lib llm_command::tests::e2e_ollama_multi_turn -- --ignored --nocapture
1654    #[tokio::test(flavor = "multi_thread")]
1655    #[ignore = "requires running Ollama server"]
1656    async fn e2e_ollama_multi_turn() {
1657        let lua = Lua::new();
1658
1659        // Turn 1
1660        let opts1 = lua.create_table().expect("create opts");
1661        opts1.set("provider", "ollama").expect("set provider");
1662        opts1.set("model", "qwen2.5-coder:1.5b").expect("set model");
1663        opts1.set("timeout", 30u64).expect("set timeout");
1664        opts1
1665            .set("system_prompt", "You are a helpful assistant. Be concise.")
1666            .expect("set system_prompt");
1667
1668        let r1 = llm_request_impl(
1669            &lua,
1670            (
1671                "My name is ORCS_TEST_USER. Remember it.".into(),
1672                Some(opts1),
1673            ),
1674        )
1675        .expect("turn 1 should not panic");
1676
1677        assert!(
1678            r1.get::<bool>("ok").expect("get ok"),
1679            "turn 1 should succeed"
1680        );
1681        let sid: String = r1.get("session_id").expect("get session_id");
1682        let content1: String = r1.get("content").expect("get content");
1683        eprintln!("[E2E turn 1] session={sid} content: {content1}");
1684
1685        // Turn 2: use same session
1686        let opts2 = lua.create_table().expect("create opts");
1687        opts2.set("provider", "ollama").expect("set provider");
1688        opts2.set("model", "qwen2.5-coder:1.5b").expect("set model");
1689        opts2.set("timeout", 30u64).expect("set timeout");
1690        opts2
1691            .set("session_id", sid.as_str())
1692            .expect("set session_id");
1693
1694        let r2 = llm_request_impl(&lua, ("What is my name?".into(), Some(opts2)))
1695            .expect("turn 2 should not panic");
1696
1697        assert!(
1698            r2.get::<bool>("ok").expect("get ok"),
1699            "turn 2 should succeed"
1700        );
1701        let sid2: String = r2.get("session_id").expect("get session_id");
1702        assert_eq!(sid, sid2, "session_id should be preserved across turns");
1703
1704        let content2: String = r2.get("content").expect("get content");
1705        eprintln!("[E2E turn 2] content: {content2}");
1706
1707        // Verify session store has history
1708        let store = lua
1709            .app_data_ref::<SessionStore>()
1710            .expect("store should exist");
1711        let history = store.0.get(&sid).expect("session should exist");
1712        // Turn 1: user + assistant = 2, Turn 2: user + assistant = 2, total = 4
1713        assert_eq!(
1714            history.len(),
1715            4,
1716            "session should have 4 messages (2 turns), got: {}",
1717            history.len()
1718        );
1719    }
1720}