Skip to main content

algocline_app/
service.rs

1use std::path::{Path, PathBuf};
2use std::sync::Arc;
3
4use algocline_core::{ExecutionMetrics, QueryId};
5use algocline_engine::{Executor, FeedResult, SessionRegistry};
6
7// ─── Transcript logging ─────────────────────────────────────────
8
9/// Controls transcript log output.
10///
11/// - `ALC_LOG_DIR`: Directory for log files. Default: `~/.algocline/logs`.
12/// - `ALC_LOG_LEVEL`: `full` (default) or `off`.
13#[derive(Clone, Debug)]
14pub struct TranscriptConfig {
15    pub dir: PathBuf,
16    pub enabled: bool,
17}
18
19impl TranscriptConfig {
20    /// Build from environment variables.
21    pub fn from_env() -> Self {
22        let dir = std::env::var("ALC_LOG_DIR")
23            .map(PathBuf::from)
24            .unwrap_or_else(|_| {
25                dirs::home_dir()
26                    .unwrap_or_else(|| PathBuf::from("."))
27                    .join(".algocline")
28                    .join("logs")
29            });
30
31        let enabled = std::env::var("ALC_LOG_LEVEL")
32            .map(|v| v.to_lowercase() != "off")
33            .unwrap_or(true);
34
35        Self { dir, enabled }
36    }
37}
38
39/// Write transcript log to `{dir}/{session_id}.json`.
40///
41/// Silently returns on I/O errors — logging must not break execution.
42fn write_transcript_log(config: &TranscriptConfig, session_id: &str, metrics: &ExecutionMetrics) {
43    if !config.enabled {
44        return;
45    }
46
47    let transcript = metrics.transcript_to_json();
48    if transcript.is_empty() {
49        return;
50    }
51
52    let stats = metrics.to_json();
53
54    // Extract task hint from first prompt (truncated to 100 chars)
55    let task_hint = transcript
56        .first()
57        .and_then(|e| e.get("prompt"))
58        .and_then(|p| p.as_str())
59        .map(|s| {
60            if s.len() <= 100 {
61                s.to_string()
62            } else {
63                // Find a char boundary at or before 100 bytes
64                let mut end = 100;
65                while end > 0 && !s.is_char_boundary(end) {
66                    end -= 1;
67                }
68                format!("{}...", &s[..end])
69            }
70        });
71
72    let auto_stats = &stats["auto"];
73
74    let log_entry = serde_json::json!({
75        "session_id": session_id,
76        "task_hint": task_hint,
77        "stats": auto_stats,
78        "transcript": transcript,
79    });
80
81    if std::fs::create_dir_all(&config.dir).is_err() {
82        return;
83    }
84
85    let path = match ContainedPath::child(&config.dir, &format!("{session_id}.json")) {
86        Ok(p) => p,
87        Err(_) => return,
88    };
89    let content = match serde_json::to_string_pretty(&log_entry) {
90        Ok(s) => s,
91        Err(_) => return,
92    };
93
94    let _ = std::fs::write(&path, content);
95
96    // Write lightweight meta file for log_list (avoids reading full transcript)
97    let meta = serde_json::json!({
98        "session_id": session_id,
99        "task_hint": task_hint,
100        "elapsed_ms": auto_stats.get("elapsed_ms"),
101        "rounds": auto_stats.get("rounds"),
102        "llm_calls": auto_stats.get("llm_calls"),
103        "notes_count": 0,
104    });
105    if let Ok(meta_path) = ContainedPath::child(&config.dir, &format!("{session_id}.meta.json")) {
106        let _ = serde_json::to_string(&meta).map(|s| std::fs::write(&meta_path, s));
107    }
108}
109
110/// Append a note to an existing log file.
111///
112/// Reads `{dir}/{session_id}.json`, adds the note to `"notes"` array, writes back.
113/// Returns Ok with the note count, or Err if the log file doesn't exist.
114fn append_note(
115    dir: &Path,
116    session_id: &str,
117    content: &str,
118    title: Option<&str>,
119) -> Result<usize, String> {
120    let path = ContainedPath::child(dir, &format!("{session_id}.json"))?;
121    if !path.as_ref().exists() {
122        return Err(format!("Log file not found for session '{session_id}'"));
123    }
124
125    let raw = std::fs::read_to_string(&path).map_err(|e| format!("Failed to read log: {e}"))?;
126    let mut doc: serde_json::Value =
127        serde_json::from_str(&raw).map_err(|e| format!("Failed to parse log: {e}"))?;
128
129    let timestamp = {
130        use std::time::{SystemTime, UNIX_EPOCH};
131        SystemTime::now()
132            .duration_since(UNIX_EPOCH)
133            .unwrap_or_default()
134            .as_secs()
135    };
136
137    let note = serde_json::json!({
138        "timestamp": timestamp,
139        "title": title,
140        "content": content,
141    });
142
143    let notes = doc
144        .as_object_mut()
145        .ok_or("Log file is not a JSON object")?
146        .entry("notes")
147        .or_insert_with(|| serde_json::json!([]));
148
149    let arr = notes
150        .as_array_mut()
151        .ok_or("'notes' field is not an array")?;
152    arr.push(note);
153    let count = arr.len();
154
155    let output =
156        serde_json::to_string_pretty(&doc).map_err(|e| format!("Failed to serialize: {e}"))?;
157    std::fs::write(path.as_ref(), output).map_err(|e| format!("Failed to write log: {e}"))?;
158
159    // Update notes_count in meta file (best-effort)
160    if let Ok(meta_path) = ContainedPath::child(dir, &format!("{session_id}.meta.json")) {
161        if meta_path.as_ref().exists() {
162            if let Ok(raw) = std::fs::read_to_string(&meta_path) {
163                if let Ok(mut meta) = serde_json::from_str::<serde_json::Value>(&raw) {
164                    meta["notes_count"] = serde_json::json!(count);
165                    if let Ok(s) = serde_json::to_string(&meta) {
166                        let _ = std::fs::write(&meta_path, s);
167                    }
168                }
169            }
170        }
171    }
172
173    Ok(count)
174}
175
176// ─── Helpers ────────────────────────────────────────────────────
177
178/// Recursively copy a directory tree (follows symlinks).
179fn copy_dir(src: &Path, dst: &Path) -> std::io::Result<()> {
180    std::fs::create_dir_all(dst)?;
181    for entry in std::fs::read_dir(src)? {
182        let entry = entry?;
183        // Use metadata() (follows symlinks) instead of file_type() (does not)
184        let meta = entry.metadata()?;
185        let dest_path = dst.join(entry.file_name());
186        if meta.is_dir() {
187            copy_dir(&entry.path(), &dest_path)?;
188        } else {
189            std::fs::copy(entry.path(), dest_path)?;
190        }
191    }
192    Ok(())
193}
194
195// ─── Path safety ────────────────────────────────────────────────
196
197/// A path verified to reside within a base directory.
198///
199/// Constructed via [`ContainedPath::child`], which rejects path traversal
200/// (`..`, absolute paths, symlink escapes). Once constructed, the inner path
201/// is safe for filesystem operations within the base directory.
202#[derive(Debug)]
203struct ContainedPath(PathBuf);
204
205impl ContainedPath {
206    /// Resolve `name` as a child of `base`, rejecting traversal attempts.
207    ///
208    /// Validates that every component in `name` is [`Component::Normal`].
209    /// If the resulting path already exists on disk, additionally verifies
210    /// via `canonicalize` that symlinks do not escape `base`.
211    fn child(base: &Path, name: &str) -> Result<Self, String> {
212        for comp in Path::new(name).components() {
213            if !matches!(comp, std::path::Component::Normal(_)) {
214                return Err(format!(
215                    "Invalid path component in '{name}': path traversal detected"
216                ));
217            }
218        }
219        let path = base.join(name);
220        if path.exists() {
221            let canonical = path
222                .canonicalize()
223                .map_err(|e| format!("Path resolution failed: {e}"))?;
224            let base_canonical = base
225                .canonicalize()
226                .map_err(|e| format!("Base path resolution failed: {e}"))?;
227            if !canonical.starts_with(&base_canonical) {
228                return Err(format!("Path '{name}' escapes base directory"));
229            }
230        }
231        Ok(Self(path))
232    }
233}
234
235impl std::ops::Deref for ContainedPath {
236    type Target = Path;
237    fn deref(&self) -> &Path {
238        &self.0
239    }
240}
241
242impl AsRef<Path> for ContainedPath {
243    fn as_ref(&self) -> &Path {
244        self
245    }
246}
247
248// ─── Parameter types (MCP-independent) ──────────────────────────
249
250/// A single query response in a batch feed.
251#[derive(Debug)]
252pub struct QueryResponse {
253    /// Query ID (e.g. "q-0", "q-1").
254    pub query_id: String,
255    /// The host LLM's response for this query.
256    pub response: String,
257}
258
259// ─── Code resolution ────────────────────────────────────────────
260
261pub(crate) fn resolve_code(
262    code: Option<String>,
263    code_file: Option<String>,
264) -> Result<String, String> {
265    match (code, code_file) {
266        (Some(c), None) => Ok(c),
267        (None, Some(path)) => std::fs::read_to_string(Path::new(&path))
268            .map_err(|e| format!("Failed to read {path}: {e}")),
269        (Some(_), Some(_)) => Err("Provide either `code` or `code_file`, not both.".into()),
270        (None, None) => Err("Either `code` or `code_file` must be provided.".into()),
271    }
272}
273
274/// Build Lua code that loads a package by name and calls `pkg.run(ctx)`.
275///
276/// # Security: `name` is not sanitized
277///
278/// `name` is interpolated directly into a Lua `require()` call without
279/// sanitization. This is intentional in the current architecture:
280///
281/// - algocline is a **local development/execution tool** that runs Lua in
282///   the user's own environment via mlua (not a multi-tenant service).
283/// - The same caller has access to `alc_run`, which executes **arbitrary
284///   Lua code**. Sanitizing `name` here would not reduce the attack surface.
285/// - The MCP trust boundary lies at the **host/client** level — the host
286///   decides whether to invoke `alc_advice` at all.
287///
288/// If algocline is extended to a shared backend (e.g. a package registry
289/// server accepting untrusted strategy names), `name` **must** be validated
290/// (allowlist of `[a-zA-Z0-9_-]` or equivalent) before interpolation.
291///
292/// References:
293/// - [MCP Security Best Practices — Local MCP Server Compromise](https://modelcontextprotocol.io/specification/draft/basic/security_best_practices)
294/// - [OWASP MCP Security Cheat Sheet](https://cheatsheetseries.owasp.org/cheatsheets/MCP_Security_Cheat_Sheet.html)
295pub(crate) fn make_require_code(name: &str) -> String {
296    format!(
297        r#"local pkg = require("{name}")
298return pkg.run(ctx)"#
299    )
300}
301
302pub(crate) fn packages_dir() -> Result<PathBuf, String> {
303    let home = dirs::home_dir().ok_or("Cannot determine home directory")?;
304    Ok(home.join(".algocline").join("packages"))
305}
306
307/// Git URLs for auto-installation. Collection repos contain multiple packages
308/// as subdirectories; single repos have init.lua at root.
309const AUTO_INSTALL_SOURCES: &[&str] = &[
310    "https://github.com/ynishi/algocline-bundled-packages",
311    "https://github.com/ynishi/evalframe",
312];
313
314/// System packages: installed alongside user packages but not user-facing strategies.
315/// Excluded from `pkg_list` and not loaded via `require` for meta extraction.
316const SYSTEM_PACKAGES: &[&str] = &["evalframe"];
317
318/// Check whether a package is a system (non-user-facing) package.
319fn is_system_package(name: &str) -> bool {
320    SYSTEM_PACKAGES.contains(&name)
321}
322
323/// Check whether a package is installed (has `init.lua`).
324fn is_package_installed(name: &str) -> bool {
325    packages_dir()
326        .map(|dir| dir.join(name).join("init.lua").exists())
327        .unwrap_or(false)
328}
329
330// ─── Eval Result Store ──────────────────────────────────────────
331
332fn evals_dir() -> Result<PathBuf, String> {
333    let home = dirs::home_dir().ok_or("Cannot determine home directory")?;
334    Ok(home.join(".algocline").join("evals"))
335}
336
337/// Persist eval result to `~/.algocline/evals/{strategy}_{timestamp}.json`.
338///
339/// Silently returns on I/O errors — storage must not break eval execution.
340fn save_eval_result(strategy: &str, result_json: &str) {
341    let dir = match evals_dir() {
342        Ok(d) => d,
343        Err(_) => return,
344    };
345    if std::fs::create_dir_all(&dir).is_err() {
346        return;
347    }
348
349    let now = std::time::SystemTime::now()
350        .duration_since(std::time::UNIX_EPOCH)
351        .unwrap_or_default();
352    let timestamp = now.as_secs();
353    let eval_id = format!("{strategy}_{timestamp}");
354
355    // Parse result to extract summary fields for meta file
356    let parsed: serde_json::Value = match serde_json::from_str(result_json) {
357        Ok(v) => v,
358        Err(_) => return,
359    };
360
361    // Write full result
362    let path = match ContainedPath::child(&dir, &format!("{eval_id}.json")) {
363        Ok(p) => p,
364        Err(_) => return,
365    };
366    let _ = std::fs::write(&path, result_json);
367
368    // Write lightweight meta file for listing
369    let result_obj = parsed.get("result");
370    let stats_obj = parsed.get("stats");
371    let aggregated = result_obj.and_then(|r| r.get("aggregated"));
372
373    let meta = serde_json::json!({
374        "eval_id": eval_id,
375        "strategy": strategy,
376        "timestamp": timestamp,
377        "pass_rate": aggregated.and_then(|a| a.get("pass_rate")),
378        "mean_score": aggregated.and_then(|a| a.get("scores")).and_then(|s| s.get("mean")),
379        "total_cases": aggregated.and_then(|a| a.get("total")),
380        "passed": aggregated.and_then(|a| a.get("passed")),
381        "llm_calls": stats_obj.and_then(|s| s.get("auto")).and_then(|a| a.get("llm_calls")),
382        "elapsed_ms": stats_obj.and_then(|s| s.get("auto")).and_then(|a| a.get("elapsed_ms")),
383        "summary": result_obj.and_then(|r| r.get("summary")),
384    });
385
386    if let Ok(meta_path) = ContainedPath::child(&dir, &format!("{eval_id}.meta.json")) {
387        let _ = serde_json::to_string(&meta).map(|s| std::fs::write(&meta_path, s));
388    }
389}
390
391// ─── Eval Comparison Helpers ─────────────────────────────────────
392
393/// Escape a string for embedding in a Lua single-quoted string literal.
394///
395/// Handles backslash, single quote, newline, and carriage return —
396/// the characters that would break or alter a `'...'` Lua string.
397fn escape_for_lua_sq(s: &str) -> String {
398    s.replace('\\', "\\\\")
399        .replace('\'', "\\'")
400        .replace('\n', "\\n")
401        .replace('\r', "\\r")
402}
403
404/// Extract strategy name from eval_id (format: "{strategy}_{timestamp}").
405fn extract_strategy_from_id(eval_id: &str) -> Option<&str> {
406    eval_id.rsplit_once('_').map(|(prefix, _)| prefix)
407}
408
409/// Persist a comparison result to `~/.algocline/evals/`.
410fn save_compare_result(eval_id_a: &str, eval_id_b: &str, result_json: &str) {
411    let dir = match evals_dir() {
412        Ok(d) => d,
413        Err(_) => return,
414    };
415    let filename = format!("compare_{eval_id_a}_vs_{eval_id_b}.json");
416    if let Ok(path) = ContainedPath::child(&dir, &filename) {
417        let _ = std::fs::write(&path, result_json);
418    }
419}
420
421// ─── Application Service ────────────────────────────────────────
422
423/// Tracks which sessions are eval sessions and their strategy name.
424type EvalSessions = std::sync::Mutex<std::collections::HashMap<String, String>>;
425
426#[derive(Clone)]
427pub struct AppService {
428    executor: Arc<Executor>,
429    registry: Arc<SessionRegistry>,
430    log_config: TranscriptConfig,
431    /// session_id → strategy name for eval sessions (cleared on completion).
432    eval_sessions: Arc<EvalSessions>,
433}
434
435impl AppService {
436    pub fn new(executor: Arc<Executor>, log_config: TranscriptConfig) -> Self {
437        Self {
438            executor,
439            registry: Arc::new(SessionRegistry::new()),
440            log_config,
441            eval_sessions: Arc::new(std::sync::Mutex::new(std::collections::HashMap::new())),
442        }
443    }
444
445    /// Execute Lua code with optional JSON context.
446    pub async fn run(
447        &self,
448        code: Option<String>,
449        code_file: Option<String>,
450        ctx: Option<serde_json::Value>,
451    ) -> Result<String, String> {
452        let code = resolve_code(code, code_file)?;
453        let ctx = ctx.unwrap_or(serde_json::Value::Null);
454        self.start_and_tick(code, ctx).await
455    }
456
457    /// Apply a built-in strategy to a task.
458    ///
459    /// If the requested package is not installed, automatically installs the
460    /// bundled package collection from GitHub before executing.
461    pub async fn advice(
462        &self,
463        strategy: &str,
464        task: String,
465        opts: Option<serde_json::Value>,
466    ) -> Result<String, String> {
467        // Auto-install bundled packages if the requested strategy is missing
468        if !is_package_installed(strategy) {
469            self.auto_install_bundled_packages().await?;
470            if !is_package_installed(strategy) {
471                return Err(format!(
472                    "Package '{strategy}' not found after installing bundled collection. \
473                     Use alc_pkg_install to install it manually."
474                ));
475            }
476        }
477
478        let code = make_require_code(strategy);
479
480        let mut ctx_map = match opts {
481            Some(serde_json::Value::Object(m)) => m,
482            _ => serde_json::Map::new(),
483        };
484        ctx_map.insert("task".into(), serde_json::Value::String(task));
485        let ctx = serde_json::Value::Object(ctx_map);
486
487        self.start_and_tick(code, ctx).await
488    }
489
490    /// Run an evalframe evaluation suite.
491    ///
492    /// Accepts a scenario (bindings + cases) and a strategy name.
493    /// Automatically wires the strategy as the provider and executes
494    /// the evalframe suite, returning the report (summary, scores, failures).
495    ///
496    /// Injects a `std` global (mlua-batteries compatible shim) so evalframe's
497    /// `std.lua` can resolve json/fs/time from algocline's built-in primitives.
498    ///
499    /// # Security: `strategy` is not sanitized
500    ///
501    /// `strategy` is interpolated into a Lua string literal without escaping.
502    /// This is intentional — same rationale as [`make_require_code`]:
503    /// algocline runs Lua in the caller's own process with full ambient
504    /// authority, so Lua injection does not cross a trust boundary.
505    pub async fn eval(
506        &self,
507        scenario: Option<String>,
508        scenario_file: Option<String>,
509        strategy: &str,
510        strategy_opts: Option<serde_json::Value>,
511    ) -> Result<String, String> {
512        // Auto-install bundled packages if evalframe is missing
513        if !is_package_installed("evalframe") {
514            self.auto_install_bundled_packages().await?;
515            if !is_package_installed("evalframe") {
516                return Err(
517                    "Package 'evalframe' not found after installing bundled collection. \
518                     Use alc_pkg_install to install it manually."
519                        .into(),
520                );
521            }
522        }
523
524        let scenario_code = resolve_code(scenario, scenario_file)?;
525
526        // Build strategy opts Lua table literal
527        let opts_lua = match &strategy_opts {
528            Some(v) if !v.is_null() => format!("alc.json_decode('{}')", v),
529            _ => "{}".to_string(),
530        };
531
532        // Inject `std` global as a mlua-batteries compatible shim.
533        //
534        // evalframe.std expects the host to provide a `std` global with:
535        //   std.json.decode/encode  — JSON serialization
536        //   std.fs.read/is_file     — filesystem access
537        //   std.time.now            — wall-clock time (epoch seconds, f64)
538        //
539        // We bridge these from algocline's alc.* primitives and Lua's io stdlib.
540        let wrapped = format!(
541            r#"
542std = {{
543  json = {{
544    decode = alc.json_decode,
545    encode = alc.json_encode,
546  }},
547  fs = {{
548    read = function(path)
549      local f, err = io.open(path, "r")
550      if not f then error("std.fs.read: " .. (err or path), 2) end
551      local content = f:read("*a")
552      f:close()
553      return content
554    end,
555    is_file = function(path)
556      local f = io.open(path, "r")
557      if f then f:close(); return true end
558      return false
559    end,
560  }},
561  time = {{
562    now = alc.time,
563  }},
564}}
565
566local ef = require("evalframe")
567
568-- Load scenario (bindings + cases, no provider)
569local spec = (function()
570{scenario_code}
571end)()
572
573-- Inject strategy as provider
574spec.provider = ef.providers.algocline {{
575  strategy = "{strategy}",
576  opts = {opts_lua},
577}}
578
579-- Build and run suite
580local s = ef.suite "eval" (spec)
581local report = s:run()
582return report:to_table()
583"#
584        );
585
586        let ctx = serde_json::Value::Null;
587        let result = self.start_and_tick(wrapped, ctx).await?;
588
589        // Register this session for eval result saving on completion.
590        // start_and_tick returns the first pause (needs_response) or completed.
591        // If completed immediately, save now. Otherwise, save when continue_* finishes.
592        if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(&result) {
593            match parsed.get("status").and_then(|s| s.as_str()) {
594                Some("completed") => {
595                    save_eval_result(strategy, &result);
596                }
597                Some("needs_response") => {
598                    if let Some(sid) = parsed.get("session_id").and_then(|s| s.as_str()) {
599                        if let Ok(mut map) = self.eval_sessions.lock() {
600                            map.insert(sid.to_string(), strategy.to_string());
601                        }
602                    }
603                }
604                _ => {}
605            }
606        }
607
608        Ok(result)
609    }
610
611    /// List eval history, optionally filtered by strategy.
612    pub fn eval_history(&self, strategy: Option<&str>, limit: usize) -> Result<String, String> {
613        let evals_dir = evals_dir()?;
614        if !evals_dir.exists() {
615            return Ok(serde_json::json!({ "evals": [] }).to_string());
616        }
617
618        let mut entries: Vec<serde_json::Value> = Vec::new();
619
620        let read_dir =
621            std::fs::read_dir(&evals_dir).map_err(|e| format!("Failed to read evals dir: {e}"))?;
622
623        for entry in read_dir.flatten() {
624            let path = entry.path();
625            if path.extension().and_then(|e| e.to_str()) != Some("json") {
626                continue;
627            }
628            // Skip meta files
629            if path
630                .file_name()
631                .and_then(|n| n.to_str())
632                .is_some_and(|n| n.contains(".meta."))
633            {
634                continue;
635            }
636
637            // Read meta file (lightweight) if it exists.
638            // Derive meta filename from the result filename to stay within evals_dir
639            // (ContainedPath ensures no traversal).
640            let stem = match path.file_stem().and_then(|s| s.to_str()) {
641                Some(s) => s,
642                None => continue,
643            };
644            let meta_path = match ContainedPath::child(&evals_dir, &format!("{stem}.meta.json")) {
645                Ok(p) => p,
646                Err(_) => continue,
647            };
648            let meta = if meta_path.exists() {
649                std::fs::read_to_string(&*meta_path)
650                    .ok()
651                    .and_then(|s| serde_json::from_str::<serde_json::Value>(&s).ok())
652            } else {
653                None
654            };
655
656            if let Some(meta) = meta {
657                // Filter by strategy if specified
658                if let Some(filter) = strategy {
659                    if meta.get("strategy").and_then(|s| s.as_str()) != Some(filter) {
660                        continue;
661                    }
662                }
663                entries.push(meta);
664            }
665        }
666
667        // Sort by timestamp descending (newest first)
668        entries.sort_by(|a, b| {
669            let ts_a = a
670                .get("timestamp")
671                .and_then(serde_json::Value::as_u64)
672                .unwrap_or(0);
673            let ts_b = b
674                .get("timestamp")
675                .and_then(serde_json::Value::as_u64)
676                .unwrap_or(0);
677            ts_b.cmp(&ts_a)
678        });
679        entries.truncate(limit);
680
681        Ok(serde_json::json!({ "evals": entries }).to_string())
682    }
683
684    /// View a specific eval result by ID.
685    pub fn eval_detail(&self, eval_id: &str) -> Result<String, String> {
686        let evals_dir = evals_dir()?;
687        let path = ContainedPath::child(&evals_dir, &format!("{eval_id}.json"))
688            .map_err(|e| format!("Invalid eval_id: {e}"))?;
689        if !path.exists() {
690            return Err(format!("Eval result not found: {eval_id}"));
691        }
692        std::fs::read_to_string(&*path).map_err(|e| format!("Failed to read eval: {e}"))
693    }
694
695    /// Compare two eval results with statistical significance testing.
696    ///
697    /// Delegates to evalframe's `stats.welch_t` (single source of truth for
698    /// t-distribution table and test logic). Reads persisted `aggregated.scores`
699    /// from each eval result — no re-computation of descriptive statistics.
700    ///
701    /// The comparison result is persisted to `~/.algocline/evals/` so repeated
702    /// lookups of the same pair are file reads only.
703    pub async fn eval_compare(&self, eval_id_a: &str, eval_id_b: &str) -> Result<String, String> {
704        // Check for cached comparison
705        let cache_filename = format!("compare_{eval_id_a}_vs_{eval_id_b}.json");
706        if let Ok(dir) = evals_dir() {
707            if let Ok(cached_path) = ContainedPath::child(&dir, &cache_filename) {
708                if cached_path.exists() {
709                    return std::fs::read_to_string(&*cached_path)
710                        .map_err(|e| format!("Failed to read cached comparison: {e}"));
711                }
712            }
713        }
714
715        // Auto-install bundled packages if evalframe is missing
716        if !is_package_installed("evalframe") {
717            self.auto_install_bundled_packages().await?;
718            if !is_package_installed("evalframe") {
719                return Err(
720                    "Package 'evalframe' not found after installing bundled collection. \
721                     Use alc_pkg_install to install it manually."
722                        .into(),
723                );
724            }
725        }
726
727        let result_a = self.eval_detail(eval_id_a)?;
728        let result_b = self.eval_detail(eval_id_b)?;
729
730        // Build Lua snippet that uses evalframe's stats module
731        // to compute welch_t from the persisted aggregated scores.
732        let lua_code = format!(
733            r#"
734std = {{
735  json = {{
736    decode = alc.json_decode,
737    encode = alc.json_encode,
738  }},
739  fs = {{ read = function() end, is_file = function() return false end }},
740  time = {{ now = alc.time }},
741}}
742
743local stats = require("evalframe.eval.stats")
744
745local result_a = alc.json_decode('{result_a_escaped}')
746local result_b = alc.json_decode('{result_b_escaped}')
747
748local agg_a = result_a.result and result_a.result.aggregated
749local agg_b = result_b.result and result_b.result.aggregated
750
751if not agg_a or not agg_a.scores then
752  error("No aggregated scores in {eval_id_a}")
753end
754if not agg_b or not agg_b.scores then
755  error("No aggregated scores in {eval_id_b}")
756end
757
758local welch = stats.welch_t(agg_a.scores, agg_b.scores)
759
760local strategy_a = (result_a.result and result_a.result.name) or "{strategy_a_fallback}"
761local strategy_b = (result_b.result and result_b.result.name) or "{strategy_b_fallback}"
762
763local delta = agg_a.scores.mean - agg_b.scores.mean
764local winner = "none"
765if welch.significant then
766  winner = delta > 0 and "a" or "b"
767end
768
769-- Build summary text
770local parts = {{}}
771if welch.significant then
772  local w, l, d = strategy_a, strategy_b, delta
773  if delta < 0 then w, l, d = strategy_b, strategy_a, -delta end
774  parts[#parts + 1] = string.format(
775    "%s outperforms %s by %.4f (mean score), statistically significant (t=%.3f, df=%.1f).",
776    w, l, d, math.abs(welch.t_stat), welch.df
777  )
778else
779  parts[#parts + 1] = string.format(
780    "No statistically significant difference between %s and %s (t=%.3f, df=%.1f).",
781    strategy_a, strategy_b, math.abs(welch.t_stat), welch.df
782  )
783end
784if agg_a.pass_rate and agg_b.pass_rate then
785  local dp = agg_a.pass_rate - agg_b.pass_rate
786  if math.abs(dp) > 1e-9 then
787    local h = dp > 0 and strategy_a or strategy_b
788    parts[#parts + 1] = string.format("Pass rate: %s +%.1fpp.", h, math.abs(dp) * 100)
789  else
790    parts[#parts + 1] = string.format("Pass rate: identical (%.1f%%).", agg_a.pass_rate * 100)
791  end
792end
793
794return {{
795  a = {{
796    eval_id = "{eval_id_a}",
797    strategy = strategy_a,
798    scores = agg_a.scores,
799    pass_rate = agg_a.pass_rate,
800    pass_at_1 = agg_a.pass_at_1,
801    ci_95 = agg_a.ci_95,
802  }},
803  b = {{
804    eval_id = "{eval_id_b}",
805    strategy = strategy_b,
806    scores = agg_b.scores,
807    pass_rate = agg_b.pass_rate,
808    pass_at_1 = agg_b.pass_at_1,
809    ci_95 = agg_b.ci_95,
810  }},
811  comparison = {{
812    delta_mean = delta,
813    welch_t = {{
814      t_stat = welch.t_stat,
815      df = welch.df,
816      significant = welch.significant,
817      direction = welch.direction,
818    }},
819    winner = winner,
820    summary = table.concat(parts, " "),
821  }},
822}}
823"#,
824            result_a_escaped = escape_for_lua_sq(&result_a),
825            result_b_escaped = escape_for_lua_sq(&result_b),
826            eval_id_a = eval_id_a,
827            eval_id_b = eval_id_b,
828            strategy_a_fallback = extract_strategy_from_id(eval_id_a).unwrap_or("A"),
829            strategy_b_fallback = extract_strategy_from_id(eval_id_b).unwrap_or("B"),
830        );
831
832        let ctx = serde_json::Value::Null;
833        let raw_result = self.start_and_tick(lua_code, ctx).await?;
834
835        // Persist comparison result
836        save_compare_result(eval_id_a, eval_id_b, &raw_result);
837
838        Ok(raw_result)
839    }
840
841    /// Continue a paused execution — batch feed.
842    pub async fn continue_batch(
843        &self,
844        session_id: &str,
845        responses: Vec<QueryResponse>,
846    ) -> Result<String, String> {
847        let mut last_result = None;
848        for qr in responses {
849            let qid = QueryId::parse(&qr.query_id);
850            let result = self
851                .registry
852                .feed_response(session_id, &qid, qr.response)
853                .await
854                .map_err(|e| format!("Continue failed: {e}"))?;
855            last_result = Some(result);
856        }
857        let result = last_result.ok_or("Empty responses array")?;
858        self.maybe_log_transcript(&result, session_id);
859        let json = result.to_json(session_id).to_string();
860        self.maybe_save_eval(&result, session_id, &json);
861        Ok(json)
862    }
863
864    /// Continue a paused execution — single response (with optional query_id).
865    pub async fn continue_single(
866        &self,
867        session_id: &str,
868        response: String,
869        query_id: Option<&str>,
870    ) -> Result<String, String> {
871        let query_id = match query_id {
872            Some(qid) => QueryId::parse(qid),
873            None => QueryId::single(),
874        };
875
876        let result = self
877            .registry
878            .feed_response(session_id, &query_id, response)
879            .await
880            .map_err(|e| format!("Continue failed: {e}"))?;
881
882        self.maybe_log_transcript(&result, session_id);
883        let json = result.to_json(session_id).to_string();
884        self.maybe_save_eval(&result, session_id, &json);
885        Ok(json)
886    }
887
888    // ─── Package Management ─────────────────────────────────────
889
890    /// List installed packages with metadata.
891    pub async fn pkg_list(&self) -> Result<String, String> {
892        let pkg_dir = packages_dir()?;
893        if !pkg_dir.is_dir() {
894            return Ok(serde_json::json!({ "packages": [] }).to_string());
895        }
896
897        let mut packages = Vec::new();
898        let entries =
899            std::fs::read_dir(&pkg_dir).map_err(|e| format!("Failed to read packages dir: {e}"))?;
900
901        for entry in entries.flatten() {
902            let path = entry.path();
903            if !path.is_dir() {
904                continue;
905            }
906            let init_lua = path.join("init.lua");
907            if !init_lua.exists() {
908                continue;
909            }
910            let name = entry.file_name().to_string_lossy().to_string();
911            // Skip system packages (not user-facing strategies)
912            if is_system_package(&name) {
913                continue;
914            }
915            let code = format!(
916                r#"local pkg = require("{name}")
917return pkg.meta or {{ name = "{name}" }}"#
918            );
919            match self.executor.eval_simple(code).await {
920                Ok(meta) => packages.push(meta),
921                Err(_) => {
922                    packages
923                        .push(serde_json::json!({ "name": name, "error": "failed to load meta" }));
924                }
925            }
926        }
927
928        Ok(serde_json::json!({ "packages": packages }).to_string())
929    }
930
931    /// Install a package from a Git URL or local path.
932    pub async fn pkg_install(&self, url: String, name: Option<String>) -> Result<String, String> {
933        let pkg_dir = packages_dir()?;
934        let _ = std::fs::create_dir_all(&pkg_dir);
935
936        // Local path: copy directly (supports uncommitted/dirty working trees)
937        let local_path = Path::new(&url);
938        if local_path.is_absolute() && local_path.is_dir() {
939            return self.install_from_local_path(local_path, &pkg_dir, name);
940        }
941
942        // Normalize URL: add https:// only for bare domain-style URLs
943        let git_url = if url.starts_with("http://")
944            || url.starts_with("https://")
945            || url.starts_with("file://")
946            || url.starts_with("git@")
947        {
948            url.clone()
949        } else {
950            format!("https://{url}")
951        };
952
953        // Clone to temp directory first to detect single vs collection
954        let staging = tempfile::tempdir().map_err(|e| format!("Failed to create temp dir: {e}"))?;
955
956        let output = tokio::process::Command::new("git")
957            .args([
958                "clone",
959                "--depth",
960                "1",
961                &git_url,
962                &staging.path().to_string_lossy(),
963            ])
964            .output()
965            .await
966            .map_err(|e| format!("Failed to run git: {e}"))?;
967
968        if !output.status.success() {
969            let stderr = String::from_utf8_lossy(&output.stderr);
970            return Err(format!("git clone failed: {stderr}"));
971        }
972
973        // Remove .git dir from staging
974        let _ = std::fs::remove_dir_all(staging.path().join(".git"));
975
976        // Detect: single package (init.lua at root) vs collection (subdirs with init.lua)
977        if staging.path().join("init.lua").exists() {
978            // Single package mode
979            let name = name.unwrap_or_else(|| {
980                url.trim_end_matches('/')
981                    .rsplit('/')
982                    .next()
983                    .unwrap_or("unknown")
984                    .trim_end_matches(".git")
985                    .to_string()
986            });
987
988            let dest = ContainedPath::child(&pkg_dir, &name)?;
989            if dest.as_ref().exists() {
990                return Err(format!(
991                    "Package '{name}' already exists at {}. Remove it first.",
992                    dest.as_ref().display()
993                ));
994            }
995
996            copy_dir(staging.path(), dest.as_ref())
997                .map_err(|e| format!("Failed to copy package: {e}"))?;
998
999            Ok(serde_json::json!({
1000                "installed": [name],
1001                "mode": "single",
1002            })
1003            .to_string())
1004        } else {
1005            // Collection mode: scan for subdirs containing init.lua
1006            if name.is_some() {
1007                // name parameter is only meaningful for single-package repos
1008                return Err(
1009                    "The 'name' parameter is only supported for single-package repos (init.lua at root). \
1010                     This repository is a collection (subdirs with init.lua)."
1011                        .to_string(),
1012                );
1013            }
1014
1015            let mut installed = Vec::new();
1016            let mut skipped = Vec::new();
1017
1018            let entries = std::fs::read_dir(staging.path())
1019                .map_err(|e| format!("Failed to read staging dir: {e}"))?;
1020
1021            for entry in entries {
1022                let entry = entry.map_err(|e| format!("Failed to read entry: {e}"))?;
1023                let path = entry.path();
1024                if !path.is_dir() {
1025                    continue;
1026                }
1027                if !path.join("init.lua").exists() {
1028                    continue;
1029                }
1030                let pkg_name = entry.file_name().to_string_lossy().to_string();
1031                let dest = pkg_dir.join(&pkg_name);
1032                if dest.exists() {
1033                    skipped.push(pkg_name);
1034                    continue;
1035                }
1036                copy_dir(&path, &dest)
1037                    .map_err(|e| format!("Failed to copy package '{pkg_name}': {e}"))?;
1038                installed.push(pkg_name);
1039            }
1040
1041            if installed.is_empty() && skipped.is_empty() {
1042                return Err(
1043                    "No packages found. Expected init.lua at root (single) or */init.lua (collection)."
1044                        .to_string(),
1045                );
1046            }
1047
1048            Ok(serde_json::json!({
1049                "installed": installed,
1050                "skipped": skipped,
1051                "mode": "collection",
1052            })
1053            .to_string())
1054        }
1055    }
1056
1057    /// Install from a local directory path (supports dirty/uncommitted files).
1058    fn install_from_local_path(
1059        &self,
1060        source: &Path,
1061        pkg_dir: &Path,
1062        name: Option<String>,
1063    ) -> Result<String, String> {
1064        if source.join("init.lua").exists() {
1065            // Single package
1066            let name = name.unwrap_or_else(|| {
1067                source
1068                    .file_name()
1069                    .map(|n| n.to_string_lossy().to_string())
1070                    .unwrap_or_else(|| "unknown".to_string())
1071            });
1072
1073            let dest = ContainedPath::child(pkg_dir, &name)?;
1074            if dest.as_ref().exists() {
1075                // Overwrite for local installs (dev workflow)
1076                let _ = std::fs::remove_dir_all(&dest);
1077            }
1078
1079            copy_dir(source, dest.as_ref()).map_err(|e| format!("Failed to copy package: {e}"))?;
1080            // Remove .git if copied
1081            let _ = std::fs::remove_dir_all(dest.as_ref().join(".git"));
1082
1083            Ok(serde_json::json!({
1084                "installed": [name],
1085                "mode": "local_single",
1086            })
1087            .to_string())
1088        } else {
1089            // Collection mode
1090            if name.is_some() {
1091                return Err(
1092                    "The 'name' parameter is only supported for single-package dirs (init.lua at root)."
1093                        .to_string(),
1094                );
1095            }
1096
1097            let mut installed = Vec::new();
1098            let mut updated = Vec::new();
1099
1100            let entries =
1101                std::fs::read_dir(source).map_err(|e| format!("Failed to read source dir: {e}"))?;
1102
1103            for entry in entries {
1104                let entry = entry.map_err(|e| format!("Failed to read entry: {e}"))?;
1105                let path = entry.path();
1106                if !path.is_dir() || !path.join("init.lua").exists() {
1107                    continue;
1108                }
1109                let pkg_name = entry.file_name().to_string_lossy().to_string();
1110                let dest = pkg_dir.join(&pkg_name);
1111                let existed = dest.exists();
1112                if existed {
1113                    let _ = std::fs::remove_dir_all(&dest);
1114                }
1115                copy_dir(&path, &dest)
1116                    .map_err(|e| format!("Failed to copy package '{pkg_name}': {e}"))?;
1117                let _ = std::fs::remove_dir_all(dest.join(".git"));
1118                if existed {
1119                    updated.push(pkg_name);
1120                } else {
1121                    installed.push(pkg_name);
1122                }
1123            }
1124
1125            if installed.is_empty() && updated.is_empty() {
1126                return Err(
1127                    "No packages found. Expected init.lua at root (single) or */init.lua (collection)."
1128                        .to_string(),
1129                );
1130            }
1131
1132            Ok(serde_json::json!({
1133                "installed": installed,
1134                "updated": updated,
1135                "mode": "local_collection",
1136            })
1137            .to_string())
1138        }
1139    }
1140
1141    /// Remove an installed package.
1142    pub async fn pkg_remove(&self, name: &str) -> Result<String, String> {
1143        let pkg_dir = packages_dir()?;
1144        let dest = ContainedPath::child(&pkg_dir, name)?;
1145
1146        if !dest.as_ref().exists() {
1147            return Err(format!("Package '{name}' not found"));
1148        }
1149
1150        std::fs::remove_dir_all(&dest).map_err(|e| format!("Failed to remove '{name}': {e}"))?;
1151
1152        Ok(serde_json::json!({ "removed": name }).to_string())
1153    }
1154
1155    // ─── Logging ─────────────────────────────────────────────
1156
1157    /// Append a note to a session's log file.
1158    pub async fn add_note(
1159        &self,
1160        session_id: &str,
1161        content: &str,
1162        title: Option<&str>,
1163    ) -> Result<String, String> {
1164        let count = append_note(&self.log_config.dir, session_id, content, title)?;
1165        Ok(serde_json::json!({
1166            "session_id": session_id,
1167            "notes_count": count,
1168        })
1169        .to_string())
1170    }
1171
1172    /// View session logs.
1173    pub async fn log_view(
1174        &self,
1175        session_id: Option<&str>,
1176        limit: Option<usize>,
1177    ) -> Result<String, String> {
1178        match session_id {
1179            Some(sid) => self.log_read(sid),
1180            None => self.log_list(limit.unwrap_or(50)),
1181        }
1182    }
1183
1184    fn log_read(&self, session_id: &str) -> Result<String, String> {
1185        let path = ContainedPath::child(&self.log_config.dir, &format!("{session_id}.json"))?;
1186        if !path.as_ref().exists() {
1187            return Err(format!("Log file not found for session '{session_id}'"));
1188        }
1189        std::fs::read_to_string(&path).map_err(|e| format!("Failed to read log: {e}"))
1190    }
1191
1192    fn log_list(&self, limit: usize) -> Result<String, String> {
1193        let dir = &self.log_config.dir;
1194        if !dir.is_dir() {
1195            return Ok(serde_json::json!({ "sessions": [] }).to_string());
1196        }
1197
1198        let entries = std::fs::read_dir(dir).map_err(|e| format!("Failed to read log dir: {e}"))?;
1199
1200        // Collect .meta.json files first; fall back to .json for legacy logs
1201        let mut files: Vec<(std::path::PathBuf, std::time::SystemTime)> = entries
1202            .flatten()
1203            .filter_map(|entry| {
1204                let path = entry.path();
1205                let name = path.file_name()?.to_str()?;
1206                // Skip non-json and meta files in this pass
1207                if !name.ends_with(".json") || name.ends_with(".meta.json") {
1208                    return None;
1209                }
1210                let mtime = entry.metadata().ok()?.modified().ok()?;
1211                Some((path, mtime))
1212            })
1213            .collect();
1214
1215        // Sort by modification time descending (newest first), take limit
1216        files.sort_by(|a, b| b.1.cmp(&a.1));
1217        files.truncate(limit);
1218
1219        let mut sessions = Vec::new();
1220        for (path, _) in &files {
1221            // Try .meta.json first (lightweight), fall back to full log
1222            let meta_path = path.with_extension("meta.json");
1223            let doc: serde_json::Value = if meta_path.exists() {
1224                // Meta file: already flat summary (~200 bytes)
1225                match std::fs::read_to_string(&meta_path)
1226                    .ok()
1227                    .and_then(|r| serde_json::from_str(&r).ok())
1228                {
1229                    Some(d) => d,
1230                    None => continue,
1231                }
1232            } else {
1233                // Legacy fallback: read full log and extract fields
1234                let raw = match std::fs::read_to_string(path) {
1235                    Ok(r) => r,
1236                    Err(_) => continue,
1237                };
1238                match serde_json::from_str::<serde_json::Value>(&raw) {
1239                    Ok(d) => {
1240                        let stats = d.get("stats");
1241                        serde_json::json!({
1242                            "session_id": d.get("session_id").and_then(|v| v.as_str()).unwrap_or("unknown"),
1243                            "task_hint": d.get("task_hint").and_then(|v| v.as_str()),
1244                            "elapsed_ms": stats.and_then(|s| s.get("elapsed_ms")),
1245                            "rounds": stats.and_then(|s| s.get("rounds")),
1246                            "llm_calls": stats.and_then(|s| s.get("llm_calls")),
1247                            "notes_count": d.get("notes").and_then(|v| v.as_array()).map(|a| a.len()).unwrap_or(0),
1248                        })
1249                    }
1250                    Err(_) => continue,
1251                }
1252            };
1253
1254            sessions.push(doc);
1255        }
1256
1257        Ok(serde_json::json!({ "sessions": sessions }).to_string())
1258    }
1259
1260    // ─── Internal ───────────────────────────────────────────────
1261
1262    /// Install all bundled sources (collections + single packages).
1263    async fn auto_install_bundled_packages(&self) -> Result<(), String> {
1264        let mut errors: Vec<String> = Vec::new();
1265        for url in AUTO_INSTALL_SOURCES {
1266            tracing::info!("auto-installing from {url}");
1267            if let Err(e) = self.pkg_install(url.to_string(), None).await {
1268                tracing::warn!("failed to auto-install from {url}: {e}");
1269                errors.push(format!("{url}: {e}"));
1270            }
1271        }
1272        // Fail only if ALL sources failed
1273        if errors.len() == AUTO_INSTALL_SOURCES.len() {
1274            return Err(format!(
1275                "Failed to auto-install bundled packages: {}",
1276                errors.join("; ")
1277            ));
1278        }
1279        Ok(())
1280    }
1281
1282    fn maybe_log_transcript(&self, result: &FeedResult, session_id: &str) {
1283        if let FeedResult::Finished(exec_result) = result {
1284            write_transcript_log(&self.log_config, session_id, &exec_result.metrics);
1285        }
1286    }
1287
1288    /// If this session was an eval, save the final result to the eval store.
1289    fn maybe_save_eval(&self, result: &FeedResult, session_id: &str, result_json: &str) {
1290        if !matches!(result, FeedResult::Finished(_)) {
1291            return;
1292        }
1293        let strategy = {
1294            let mut map = match self.eval_sessions.lock() {
1295                Ok(m) => m,
1296                Err(_) => return,
1297            };
1298            map.remove(session_id)
1299        };
1300        if let Some(strategy) = strategy {
1301            save_eval_result(&strategy, result_json);
1302        }
1303    }
1304
1305    async fn start_and_tick(&self, code: String, ctx: serde_json::Value) -> Result<String, String> {
1306        let session = self.executor.start_session(code, ctx).await?;
1307        let (session_id, result) = self
1308            .registry
1309            .start_execution(session)
1310            .await
1311            .map_err(|e| format!("Execution failed: {e}"))?;
1312        self.maybe_log_transcript(&result, &session_id);
1313        Ok(result.to_json(&session_id).to_string())
1314    }
1315}
1316
1317#[cfg(test)]
1318mod tests {
1319    use super::*;
1320    use algocline_core::ExecutionObserver;
1321    use std::io::Write;
1322
1323    // ─── resolve_code tests ───
1324
1325    #[test]
1326    fn resolve_code_inline() {
1327        let result = resolve_code(Some("return 1".into()), None);
1328        assert_eq!(result.unwrap(), "return 1");
1329    }
1330
1331    #[test]
1332    fn resolve_code_from_file() {
1333        let mut tmp = tempfile::NamedTempFile::new().unwrap();
1334        write!(tmp, "return 42").unwrap();
1335
1336        let result = resolve_code(None, Some(tmp.path().to_string_lossy().into()));
1337        assert_eq!(result.unwrap(), "return 42");
1338    }
1339
1340    #[test]
1341    fn resolve_code_both_provided_error() {
1342        let result = resolve_code(Some("code".into()), Some("file.lua".into()));
1343        let err = result.unwrap_err();
1344        assert!(err.contains("not both"), "error: {err}");
1345    }
1346
1347    #[test]
1348    fn resolve_code_neither_provided_error() {
1349        let result = resolve_code(None, None);
1350        let err = result.unwrap_err();
1351        assert!(err.contains("must be provided"), "error: {err}");
1352    }
1353
1354    #[test]
1355    fn resolve_code_nonexistent_file_error() {
1356        let result = resolve_code(
1357            None,
1358            Some("/tmp/algocline_nonexistent_test_file.lua".into()),
1359        );
1360        assert!(result.is_err());
1361    }
1362
1363    // ─── make_require_code tests ───
1364
1365    #[test]
1366    fn make_require_code_basic() {
1367        let code = make_require_code("ucb");
1368        assert!(code.contains(r#"require("ucb")"#), "code: {code}");
1369        assert!(code.contains("pkg.run(ctx)"), "code: {code}");
1370    }
1371
1372    #[test]
1373    fn make_require_code_different_names() {
1374        for name in &["panel", "cot", "sc", "cove", "reflect", "calibrate"] {
1375            let code = make_require_code(name);
1376            assert!(
1377                code.contains(&format!(r#"require("{name}")"#)),
1378                "code for {name}: {code}"
1379            );
1380        }
1381    }
1382
1383    // ─── packages_dir tests ───
1384
1385    #[test]
1386    fn packages_dir_ends_with_expected_path() {
1387        let dir = packages_dir().unwrap();
1388        assert!(
1389            dir.ends_with(".algocline/packages"),
1390            "dir: {}",
1391            dir.display()
1392        );
1393    }
1394
1395    // ─── append_note tests ───
1396
1397    #[test]
1398    fn append_note_to_existing_log() {
1399        let dir = tempfile::tempdir().unwrap();
1400        let session_id = "s-test-001";
1401        let log = serde_json::json!({
1402            "session_id": session_id,
1403            "stats": { "elapsed_ms": 100 },
1404            "transcript": [],
1405        });
1406        let path = dir.path().join(format!("{session_id}.json"));
1407        std::fs::write(&path, serde_json::to_string_pretty(&log).unwrap()).unwrap();
1408
1409        let count = append_note(dir.path(), session_id, "Step 2 was weak", Some("Step 2")).unwrap();
1410        assert_eq!(count, 1);
1411
1412        let count = append_note(dir.path(), session_id, "Overall good", None).unwrap();
1413        assert_eq!(count, 2);
1414
1415        let raw = std::fs::read_to_string(&path).unwrap();
1416        let doc: serde_json::Value = serde_json::from_str(&raw).unwrap();
1417        let notes = doc["notes"].as_array().unwrap();
1418        assert_eq!(notes.len(), 2);
1419        assert_eq!(notes[0]["content"], "Step 2 was weak");
1420        assert_eq!(notes[0]["title"], "Step 2");
1421        assert_eq!(notes[1]["content"], "Overall good");
1422        assert!(notes[1]["title"].is_null());
1423        assert!(notes[0]["timestamp"].is_number());
1424    }
1425
1426    #[test]
1427    fn append_note_missing_log_returns_error() {
1428        let dir = tempfile::tempdir().unwrap();
1429        let result = append_note(dir.path(), "s-nonexistent", "note", None);
1430        assert!(result.is_err());
1431        assert!(result.unwrap_err().contains("not found"));
1432    }
1433
1434    // ─── log_list / log_view tests ───
1435
1436    #[test]
1437    fn log_list_from_dir() {
1438        let dir = tempfile::tempdir().unwrap();
1439
1440        // Create two log files
1441        let log1 = serde_json::json!({
1442            "session_id": "s-001",
1443            "task_hint": "What is 2+2?",
1444            "stats": { "elapsed_ms": 100, "rounds": 1, "llm_calls": 1 },
1445            "transcript": [{ "prompt": "What is 2+2?", "response": "4" }],
1446        });
1447        let log2 = serde_json::json!({
1448            "session_id": "s-002",
1449            "task_hint": "Explain ownership",
1450            "stats": { "elapsed_ms": 5000, "rounds": 3, "llm_calls": 3 },
1451            "transcript": [],
1452            "notes": [{ "timestamp": 0, "content": "good" }],
1453        });
1454
1455        std::fs::write(
1456            dir.path().join("s-001.json"),
1457            serde_json::to_string(&log1).unwrap(),
1458        )
1459        .unwrap();
1460        std::fs::write(
1461            dir.path().join("s-002.json"),
1462            serde_json::to_string(&log2).unwrap(),
1463        )
1464        .unwrap();
1465        // Non-json file should be ignored
1466        std::fs::write(dir.path().join("README.txt"), "ignore me").unwrap();
1467
1468        let config = TranscriptConfig {
1469            dir: dir.path().to_path_buf(),
1470            enabled: true,
1471        };
1472
1473        // Use log_list directly via the free function path
1474        let entries = std::fs::read_dir(&config.dir).unwrap();
1475        let mut count = 0;
1476        for entry in entries.flatten() {
1477            if entry.path().extension().and_then(|e| e.to_str()) == Some("json") {
1478                count += 1;
1479            }
1480        }
1481        assert_eq!(count, 2);
1482    }
1483
1484    // ─── ContainedPath tests ───
1485
1486    #[test]
1487    fn contained_path_accepts_simple_name() {
1488        let dir = tempfile::tempdir().unwrap();
1489        let result = ContainedPath::child(dir.path(), "s-abc123.json");
1490        assert!(result.is_ok());
1491        assert!(result.unwrap().as_ref().ends_with("s-abc123.json"));
1492    }
1493
1494    #[test]
1495    fn contained_path_rejects_parent_traversal() {
1496        let dir = tempfile::tempdir().unwrap();
1497        let result = ContainedPath::child(dir.path(), "../../../etc/passwd");
1498        assert!(result.is_err());
1499        let err = result.unwrap_err();
1500        assert!(err.contains("path traversal"), "err: {err}");
1501    }
1502
1503    #[test]
1504    fn contained_path_rejects_absolute_path() {
1505        let dir = tempfile::tempdir().unwrap();
1506        let result = ContainedPath::child(dir.path(), "/etc/passwd");
1507        assert!(result.is_err());
1508        let err = result.unwrap_err();
1509        assert!(err.contains("path traversal"), "err: {err}");
1510    }
1511
1512    #[test]
1513    fn contained_path_rejects_dot_dot_in_middle() {
1514        let dir = tempfile::tempdir().unwrap();
1515        let result = ContainedPath::child(dir.path(), "foo/../bar");
1516        assert!(result.is_err());
1517    }
1518
1519    #[test]
1520    fn contained_path_accepts_nested_normal() {
1521        let dir = tempfile::tempdir().unwrap();
1522        let result = ContainedPath::child(dir.path(), "sub/file.json");
1523        assert!(result.is_ok());
1524    }
1525
1526    #[test]
1527    fn append_note_rejects_traversal_session_id() {
1528        let dir = tempfile::tempdir().unwrap();
1529        let result = append_note(dir.path(), "../../../etc/passwd", "evil", None);
1530        assert!(result.is_err());
1531        assert!(result.unwrap_err().contains("path traversal"));
1532    }
1533
1534    // ─── meta file tests ───
1535
1536    #[test]
1537    fn write_transcript_log_creates_meta_file() {
1538        let dir = tempfile::tempdir().unwrap();
1539        let config = TranscriptConfig {
1540            dir: dir.path().to_path_buf(),
1541            enabled: true,
1542        };
1543
1544        let metrics = algocline_core::ExecutionMetrics::new();
1545        let observer = metrics.create_observer();
1546        observer.on_paused(&[algocline_core::LlmQuery {
1547            id: algocline_core::QueryId::single(),
1548            prompt: "What is 2+2?".into(),
1549            system: None,
1550            max_tokens: 100,
1551            grounded: false,
1552            underspecified: false,
1553        }]);
1554        observer.on_response_fed(&algocline_core::QueryId::single(), "4");
1555        observer.on_resumed();
1556        observer.on_completed(&serde_json::json!(null));
1557
1558        write_transcript_log(&config, "s-meta-test", &metrics);
1559
1560        // Main log should exist
1561        assert!(dir.path().join("s-meta-test.json").exists());
1562
1563        // Meta file should exist
1564        let meta_path = dir.path().join("s-meta-test.meta.json");
1565        assert!(meta_path.exists());
1566
1567        let raw = std::fs::read_to_string(&meta_path).unwrap();
1568        let meta: serde_json::Value = serde_json::from_str(&raw).unwrap();
1569        assert_eq!(meta["session_id"], "s-meta-test");
1570        assert_eq!(meta["notes_count"], 0);
1571        assert!(meta.get("elapsed_ms").is_some());
1572        assert!(meta.get("rounds").is_some());
1573        assert!(meta.get("llm_calls").is_some());
1574        // Meta should NOT contain transcript
1575        assert!(meta.get("transcript").is_none());
1576    }
1577
1578    #[test]
1579    fn append_note_updates_meta_notes_count() {
1580        let dir = tempfile::tempdir().unwrap();
1581        let session_id = "s-meta-note";
1582
1583        // Create main log
1584        let log = serde_json::json!({
1585            "session_id": session_id,
1586            "stats": { "elapsed_ms": 100 },
1587            "transcript": [],
1588        });
1589        std::fs::write(
1590            dir.path().join(format!("{session_id}.json")),
1591            serde_json::to_string_pretty(&log).unwrap(),
1592        )
1593        .unwrap();
1594
1595        // Create meta file
1596        let meta = serde_json::json!({
1597            "session_id": session_id,
1598            "task_hint": "test",
1599            "elapsed_ms": 100,
1600            "rounds": 1,
1601            "llm_calls": 1,
1602            "notes_count": 0,
1603        });
1604        std::fs::write(
1605            dir.path().join(format!("{session_id}.meta.json")),
1606            serde_json::to_string(&meta).unwrap(),
1607        )
1608        .unwrap();
1609
1610        append_note(dir.path(), session_id, "first note", None).unwrap();
1611
1612        let raw =
1613            std::fs::read_to_string(dir.path().join(format!("{session_id}.meta.json"))).unwrap();
1614        let updated: serde_json::Value = serde_json::from_str(&raw).unwrap();
1615        assert_eq!(updated["notes_count"], 1);
1616
1617        append_note(dir.path(), session_id, "second note", None).unwrap();
1618
1619        let raw =
1620            std::fs::read_to_string(dir.path().join(format!("{session_id}.meta.json"))).unwrap();
1621        let updated: serde_json::Value = serde_json::from_str(&raw).unwrap();
1622        assert_eq!(updated["notes_count"], 2);
1623    }
1624
1625    // ─── TranscriptConfig tests ───
1626
1627    #[test]
1628    fn transcript_config_default_enabled() {
1629        // Without env vars, should default to enabled
1630        let config = TranscriptConfig {
1631            dir: PathBuf::from("/tmp/test"),
1632            enabled: true,
1633        };
1634        assert!(config.enabled);
1635    }
1636
1637    #[test]
1638    fn write_transcript_log_disabled_is_noop() {
1639        let dir = tempfile::tempdir().unwrap();
1640        let config = TranscriptConfig {
1641            dir: dir.path().to_path_buf(),
1642            enabled: false,
1643        };
1644        let metrics = algocline_core::ExecutionMetrics::new();
1645        let observer = metrics.create_observer();
1646        observer.on_paused(&[algocline_core::LlmQuery {
1647            id: algocline_core::QueryId::single(),
1648            prompt: "test".into(),
1649            system: None,
1650            max_tokens: 10,
1651            grounded: false,
1652            underspecified: false,
1653        }]);
1654        observer.on_response_fed(&algocline_core::QueryId::single(), "r");
1655        observer.on_resumed();
1656        observer.on_completed(&serde_json::json!(null));
1657
1658        write_transcript_log(&config, "s-disabled", &metrics);
1659
1660        // No file should be created
1661        assert!(!dir.path().join("s-disabled.json").exists());
1662        assert!(!dir.path().join("s-disabled.meta.json").exists());
1663    }
1664
1665    #[test]
1666    fn write_transcript_log_empty_transcript_is_noop() {
1667        let dir = tempfile::tempdir().unwrap();
1668        let config = TranscriptConfig {
1669            dir: dir.path().to_path_buf(),
1670            enabled: true,
1671        };
1672        // Metrics with no observer events → empty transcript
1673        let metrics = algocline_core::ExecutionMetrics::new();
1674        write_transcript_log(&config, "s-empty", &metrics);
1675        assert!(!dir.path().join("s-empty.json").exists());
1676    }
1677
1678    // ─── copy_dir tests ───
1679
1680    #[test]
1681    fn copy_dir_basic() {
1682        let src = tempfile::tempdir().unwrap();
1683        let dst = tempfile::tempdir().unwrap();
1684
1685        std::fs::write(src.path().join("a.txt"), "hello").unwrap();
1686        std::fs::create_dir(src.path().join("sub")).unwrap();
1687        std::fs::write(src.path().join("sub/b.txt"), "world").unwrap();
1688
1689        let dst_path = dst.path().join("copied");
1690        copy_dir(src.path(), &dst_path).unwrap();
1691
1692        assert_eq!(
1693            std::fs::read_to_string(dst_path.join("a.txt")).unwrap(),
1694            "hello"
1695        );
1696        assert_eq!(
1697            std::fs::read_to_string(dst_path.join("sub/b.txt")).unwrap(),
1698            "world"
1699        );
1700    }
1701
1702    #[test]
1703    fn copy_dir_empty() {
1704        let src = tempfile::tempdir().unwrap();
1705        let dst = tempfile::tempdir().unwrap();
1706        let dst_path = dst.path().join("empty_copy");
1707        copy_dir(src.path(), &dst_path).unwrap();
1708        assert!(dst_path.exists());
1709        assert!(dst_path.is_dir());
1710    }
1711
1712    // ─── task_hint truncation in write_transcript_log ───
1713
1714    #[test]
1715    fn write_transcript_log_truncates_long_prompt() {
1716        let dir = tempfile::tempdir().unwrap();
1717        let config = TranscriptConfig {
1718            dir: dir.path().to_path_buf(),
1719            enabled: true,
1720        };
1721        let metrics = algocline_core::ExecutionMetrics::new();
1722        let observer = metrics.create_observer();
1723        let long_prompt = "x".repeat(300);
1724        observer.on_paused(&[algocline_core::LlmQuery {
1725            id: algocline_core::QueryId::single(),
1726            prompt: long_prompt,
1727            system: None,
1728            max_tokens: 10,
1729            grounded: false,
1730            underspecified: false,
1731        }]);
1732        observer.on_response_fed(&algocline_core::QueryId::single(), "r");
1733        observer.on_resumed();
1734        observer.on_completed(&serde_json::json!(null));
1735
1736        write_transcript_log(&config, "s-long", &metrics);
1737
1738        let raw = std::fs::read_to_string(dir.path().join("s-long.json")).unwrap();
1739        let doc: serde_json::Value = serde_json::from_str(&raw).unwrap();
1740        let hint = doc["task_hint"].as_str().unwrap();
1741        // Should be truncated to ~100 chars + "..."
1742        assert!(hint.len() <= 104, "hint too long: {} chars", hint.len());
1743        assert!(hint.ends_with("..."));
1744    }
1745
1746    #[test]
1747    fn log_list_prefers_meta_file() {
1748        let dir = tempfile::tempdir().unwrap();
1749
1750        // Create a full log (large, with transcript)
1751        let log = serde_json::json!({
1752            "session_id": "s-big",
1753            "task_hint": "full log hint",
1754            "stats": { "elapsed_ms": 999, "rounds": 5, "llm_calls": 5 },
1755            "transcript": [{"prompt": "x".repeat(10000), "response": "y".repeat(10000)}],
1756        });
1757        std::fs::write(
1758            dir.path().join("s-big.json"),
1759            serde_json::to_string(&log).unwrap(),
1760        )
1761        .unwrap();
1762
1763        // Create corresponding meta
1764        let meta = serde_json::json!({
1765            "session_id": "s-big",
1766            "task_hint": "full log hint",
1767            "elapsed_ms": 999,
1768            "rounds": 5,
1769            "llm_calls": 5,
1770            "notes_count": 0,
1771        });
1772        std::fs::write(
1773            dir.path().join("s-big.meta.json"),
1774            serde_json::to_string(&meta).unwrap(),
1775        )
1776        .unwrap();
1777
1778        // Create a legacy log (no meta file)
1779        let legacy = serde_json::json!({
1780            "session_id": "s-legacy",
1781            "task_hint": "legacy hint",
1782            "stats": { "elapsed_ms": 100, "rounds": 1, "llm_calls": 1 },
1783            "transcript": [],
1784        });
1785        std::fs::write(
1786            dir.path().join("s-legacy.json"),
1787            serde_json::to_string(&legacy).unwrap(),
1788        )
1789        .unwrap();
1790
1791        let config = TranscriptConfig {
1792            dir: dir.path().to_path_buf(),
1793            enabled: true,
1794        };
1795        let app = AppService {
1796            executor: Arc::new(
1797                tokio::runtime::Builder::new_current_thread()
1798                    .build()
1799                    .unwrap()
1800                    .block_on(async { algocline_engine::Executor::new(vec![]).await.unwrap() }),
1801            ),
1802            registry: Arc::new(algocline_engine::SessionRegistry::new()),
1803            log_config: config,
1804            eval_sessions: Arc::new(std::sync::Mutex::new(std::collections::HashMap::new())),
1805        };
1806
1807        let result = app.log_list(50).unwrap();
1808        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
1809        let sessions = parsed["sessions"].as_array().unwrap();
1810
1811        assert_eq!(sessions.len(), 2);
1812
1813        // Both sessions should have session_id and task_hint
1814        let ids: Vec<&str> = sessions
1815            .iter()
1816            .map(|s| s["session_id"].as_str().unwrap())
1817            .collect();
1818        assert!(ids.contains(&"s-big"));
1819        assert!(ids.contains(&"s-legacy"));
1820    }
1821}
1822
1823#[cfg(test)]
1824mod proptests {
1825    use super::*;
1826    use proptest::prelude::*;
1827
1828    proptest! {
1829        /// resolve_code never panics.
1830        #[test]
1831        fn resolve_code_never_panics(
1832            code in proptest::option::of("[a-z]{0,50}"),
1833            file in proptest::option::of("[a-z]{0,50}"),
1834        ) {
1835            let _ = resolve_code(code, file);
1836        }
1837
1838        /// ContainedPath always rejects ".." components.
1839        #[test]
1840        fn contained_path_rejects_traversal(
1841            prefix in "[a-z]{0,5}",
1842            suffix in "[a-z]{0,5}",
1843        ) {
1844            let dir = tempfile::tempdir().unwrap();
1845            let name = format!("{prefix}/../{suffix}");
1846            let result = ContainedPath::child(dir.path(), &name);
1847            prop_assert!(result.is_err());
1848        }
1849
1850        /// ContainedPath accepts simple alphanumeric names.
1851        #[test]
1852        fn contained_path_accepts_simple_names(name in "[a-z][a-z0-9_-]{0,20}\\.json") {
1853            let dir = tempfile::tempdir().unwrap();
1854            let result = ContainedPath::child(dir.path(), &name);
1855            prop_assert!(result.is_ok());
1856        }
1857
1858        /// make_require_code always contains the strategy name in a require call.
1859        #[test]
1860        fn make_require_code_contains_name(name in "[a-z_]{1,20}") {
1861            let code = make_require_code(&name);
1862            let expected = format!("require(\"{}\")", name);
1863            prop_assert!(code.contains(&expected));
1864            prop_assert!(code.contains("pkg.run(ctx)"));
1865        }
1866
1867        /// copy_dir preserves file contents for arbitrary data.
1868        #[test]
1869        fn copy_dir_preserves_content(content in "[a-zA-Z0-9 ]{1,200}") {
1870            let src = tempfile::tempdir().unwrap();
1871            let dst = tempfile::tempdir().unwrap();
1872
1873            std::fs::write(src.path().join("test.txt"), &content).unwrap();
1874            let dst_path = dst.path().join("out");
1875            copy_dir(src.path(), &dst_path).unwrap();
1876
1877            let read = std::fs::read_to_string(dst_path.join("test.txt")).unwrap();
1878            prop_assert_eq!(&read, &content);
1879        }
1880    }
1881
1882    // ─── eval tests ───
1883
1884    #[test]
1885    fn eval_rejects_no_scenario() {
1886        let result = resolve_code(None, None);
1887        assert!(result.is_err());
1888    }
1889
1890    #[test]
1891    fn eval_auto_installs_evalframe_on_missing() {
1892        // Skip if evalframe is already installed globally
1893        if is_package_installed("evalframe") {
1894            return;
1895        }
1896
1897        let rt = tokio::runtime::Builder::new_current_thread()
1898            .enable_all()
1899            .build()
1900            .unwrap();
1901
1902        let tmp = tempfile::tempdir().unwrap();
1903        let fake_pkg_dir = tmp.path().join("empty_packages");
1904        std::fs::create_dir_all(&fake_pkg_dir).unwrap();
1905
1906        let executor = Arc::new(rt.block_on(async {
1907            algocline_engine::Executor::new(vec![fake_pkg_dir])
1908                .await
1909                .unwrap()
1910        }));
1911        let config = TranscriptConfig {
1912            dir: tmp.path().join("logs"),
1913            enabled: false,
1914        };
1915        let svc = AppService::new(executor, config);
1916
1917        let scenario = r#"return { cases = {} }"#;
1918        let result = rt.block_on(svc.eval(Some(scenario.into()), None, "cove", None));
1919        assert!(result.is_err());
1920        // Auto-install is attempted first; error is about bundled install failure
1921        // (git clone) or evalframe still missing after install
1922        let err = result.unwrap_err();
1923        assert!(
1924            err.contains("bundled") || err.contains("evalframe"),
1925            "unexpected error: {err}"
1926        );
1927    }
1928
1929    // ─── comparison helper tests ───
1930
1931    #[test]
1932    fn extract_strategy_from_id_splits_correctly() {
1933        assert_eq!(extract_strategy_from_id("cove_1710672000"), Some("cove"));
1934        assert_eq!(
1935            extract_strategy_from_id("my_strat_1710672000"),
1936            Some("my_strat")
1937        );
1938        assert_eq!(extract_strategy_from_id("nostamp"), None);
1939    }
1940
1941    #[test]
1942    fn save_compare_result_persists_file() {
1943        let tmp = tempfile::tempdir().unwrap();
1944        let evals = tmp.path().join(".algocline").join("evals");
1945        std::fs::create_dir_all(&evals).unwrap();
1946
1947        // save_compare_result uses evals_dir() which reads HOME.
1948        // Test ContainedPath + write logic directly instead.
1949        let filename = "compare_a_1_vs_b_2.json";
1950        let path = ContainedPath::child(&evals, filename).unwrap();
1951        let data = r#"{"test": true}"#;
1952        std::fs::write(&*path, data).unwrap();
1953
1954        let read = std::fs::read_to_string(&*path).unwrap();
1955        assert_eq!(read, data);
1956    }
1957}