Skip to main content

amql_engine/
bench.rs

1//! Benchmark runner comparing AQL output size against baseline approaches.
2//!
3//! Parses `.config/aql.bench` XML configs and executes each case, measuring
4//! output bytes and estimated tokens for cat and arbitrary shell commands vs AQL.
5//! Supports auto-detection from project structure or explicit case definitions.
6
7use crate::code_cache::glob_source_files;
8use crate::error::AqlError;
9use crate::extractor::ExtractorRegistry;
10use crate::navigate;
11use crate::resolver::ResolverRegistry;
12use crate::store::AnnotationStore;
13use crate::types::{ProjectRoot, RelativePath, Scope};
14use crate::xml;
15use quick_xml::events::Event;
16use quick_xml::Reader;
17use serde::Serialize;
18use std::path::Path;
19use std::time::Instant;
20
21// ── Constants ────────────────────────────────────────────────────────────
22
23/// Directories pruned from find commands and directory walks.
24const PRUNE_DIRS: &[&str] = &[
25    "node_modules",
26    ".git",
27    "dist",
28    "build",
29    "target",
30    "coverage",
31];
32
33// ── Types ────────────────────────────────────────────────────────────────
34
35/// Parsed benchmark configuration from `.config/aql.bench`.
36#[derive(Debug, Clone)]
37pub struct BenchConfig {
38    pub cases: Vec<BenchCase>,
39}
40
41/// A single benchmark scenario comparing multiple baselines against AQL.
42#[derive(Debug, Clone)]
43pub struct BenchCase {
44    /// Unique identifier for this case.
45    pub name: String,
46    /// Human-readable description of the task.
47    pub task: String,
48    /// Baseline approaches (what you'd do without AQL).
49    pub baselines: Vec<BaselineDef>,
50    /// AQL approach.
51    pub aql: AqlDef,
52}
53
54/// Baseline approach definition.
55///
56/// Each variant knows how to generate a shell command that gets executed
57/// via `sh -c`. Output bytes are measured from stdout.
58#[derive(Debug, Clone)]
59#[non_exhaustive]
60pub enum BaselineDef {
61    /// Read all matching files verbatim.
62    Cat { path: String, globs: Vec<String> },
63    /// Arbitrary shell command. Users configure comparison tools (grep, ast-grep, etc.)
64    /// via `Command` in `.config/aql.bench`; the engine has no opinion on which tools exist.
65    Command { name: String, cmd: String },
66}
67
68impl BaselineDef {
69    /// Generate the shell command string for this baseline.
70    ///
71    /// The command is designed to be run via `sh -c` with stdout captured.
72    /// Commands use `find | xargs` for directory targets with globs.
73    fn to_shell_command(&self, project_root: &Path) -> (String, String) {
74        match self {
75            BaselineDef::Cat { path, globs } => {
76                let target = project_root.join(path);
77                let cmd = if target.is_file() {
78                    format!("cat {}", shell_escape(&target.to_string_lossy()))
79                } else if globs.is_empty() {
80                    format!(
81                        "find {} {} -type f -print0 | xargs -0 cat",
82                        shell_escape(&target.to_string_lossy()),
83                        find_prune_clause(),
84                    )
85                } else {
86                    let name_args = globs_to_find_names(globs);
87                    format!(
88                        "find {} {} -type f \\( {} \\) -print0 | xargs -0 cat",
89                        shell_escape(&target.to_string_lossy()),
90                        find_prune_clause(),
91                        name_args,
92                    )
93                };
94                ("cat".to_string(), cmd)
95            }
96            BaselineDef::Command { name, cmd } => (name.clone(), cmd.clone()),
97        }
98    }
99}
100
101/// Build the `find` prune clause from `PRUNE_DIRS`.
102fn find_prune_clause() -> String {
103    let inner: Vec<String> = PRUNE_DIRS.iter().map(|d| format!("-name {d}")).collect();
104    format!("\\( {} \\) -prune -o", inner.join(" -o "))
105}
106
107/// Convert glob patterns to `find -name` arguments.
108fn globs_to_find_names(globs: &[String]) -> String {
109    globs
110        .iter()
111        .enumerate()
112        .map(|(i, g)| {
113            let prefix = if i > 0 { "-o " } else { "" };
114            format!("{prefix}-name {}", shell_escape(g))
115        })
116        .collect::<Vec<_>>()
117        .join(" ")
118}
119
120/// Shell-escape a string with single quotes.
121fn shell_escape(s: &str) -> String {
122    if s.contains('\'') {
123        format!("'{}'", s.replace('\'', "'\\''"))
124    } else {
125        format!("'{s}'")
126    }
127}
128
129/// AQL approach definition.
130#[derive(Debug, Clone)]
131#[non_exhaustive]
132pub enum AqlDef {
133    /// Run a built-in extractor.
134    Extract { extractor: String, path: String },
135    /// Run tree-sitter node selection.
136    NavSelect { path: String, selector: String },
137    /// Run a unified query.
138    Query { selector: String, scope: String },
139}
140
141/// Result of running a single benchmark case.
142#[non_exhaustive]
143#[derive(Debug, Clone, Serialize)]
144pub struct BenchResult {
145    pub name: String,
146    pub task: String,
147    /// Estimated tokens for the task prompt (shared across all approaches).
148    pub task_tokens: usize,
149    pub baselines: Vec<MeasuredOutput>,
150    pub aql: MeasuredOutput,
151    /// Worst baseline total / AQL total. `None` when AQL produced zero tokens.
152    pub ratio: Option<f64>,
153    pub winner: String,
154}
155
156/// Measured output from one approach.
157#[non_exhaustive]
158#[derive(Debug, Clone, Serialize)]
159pub struct MeasuredOutput {
160    #[serde(rename = "type")]
161    pub approach_type: String,
162    pub bytes: usize,
163    /// Tokens from tool output only.
164    pub tokens: usize,
165    /// Total tokens an agent would consume: task prompt + tool output.
166    pub total_tokens: usize,
167    pub wall_ms: u64,
168    /// Set when the measurement failed (command error, extractor not found, etc.).
169    /// A failed run is excluded from ratio/winner computation.
170    pub error: Option<String>,
171}
172
173// ── XML Parsing ──────────────────────────────────────────────────────────
174
175/// Look up `key` in pre-collected attr pairs.
176fn get_attr<'a>(pairs: &'a [(String, String)], key: &str) -> Option<&'a str> {
177    pairs
178        .iter()
179        .find(|(k, _)| k == key)
180        .map(|(_, v)| v.as_str())
181}
182
183/// Parse comma-separated globs from an attribute value.
184fn parse_globs(raw: &str) -> Vec<String> {
185    raw.split(',')
186        .map(|s| s.trim().to_string())
187        .filter(|s| !s.is_empty())
188        .collect()
189}
190
191/// Parse a benchmark config from a raw XML string.
192#[must_use = "parsing a bench config is useless without inspecting the result"]
193pub fn parse_bench_config(raw: &str) -> Result<BenchConfig, AqlError> {
194    let mut reader = Reader::from_str(raw);
195    let mut buf = Vec::new();
196    let mut cases: Vec<BenchCase> = Vec::new();
197
198    let mut in_benchmarks = false;
199    let mut current_case: Option<PartialCase> = None;
200
201    loop {
202        match reader.read_event_into(&mut buf) {
203            Ok(Event::Eof) => break,
204            Ok(Event::Start(ref e)) => {
205                let name = xml::element_name(e)?;
206                let pairs = xml::attr_map(e)?;
207
208                match name.as_str() {
209                    "benchmarks" => {
210                        in_benchmarks = true;
211                    }
212                    "case" if in_benchmarks => {
213                        current_case = Some(PartialCase {
214                            name: get_attr(&pairs, "name").unwrap_or("").to_string(),
215                            task: get_attr(&pairs, "task").unwrap_or("").to_string(),
216                            baselines: Vec::new(),
217                            aql: None,
218                        });
219                    }
220                    _ => {}
221                }
222            }
223            Ok(Event::Empty(ref e)) => {
224                let name = xml::element_name(e)?;
225                let pairs = xml::attr_map(e)?;
226
227                if let Some(ref mut case) = current_case {
228                    match name.as_str() {
229                        "baseline" => {
230                            case.baselines.push(parse_baseline_def(&pairs)?);
231                        }
232                        "aql" => {
233                            case.aql = Some(parse_aql_def(&pairs)?);
234                        }
235                        _ => {}
236                    }
237                }
238            }
239            Ok(Event::End(ref e)) => {
240                let name = xml::end_name(e)?;
241                match name.as_str() {
242                    "benchmarks" => {
243                        in_benchmarks = false;
244                    }
245                    "case" => {
246                        if let Some(partial) = current_case.take() {
247                            cases.push(partial.finish()?);
248                        }
249                    }
250                    _ => {}
251                }
252            }
253            Err(e) => return Err(format!("Invalid XML in bench config: {e}").into()),
254            _ => {}
255        }
256        buf.clear();
257    }
258
259    if cases.is_empty() {
260        return Err("Bench config contains no <case> elements".into());
261    }
262
263    Ok(BenchConfig { cases })
264}
265
266/// Intermediate state while parsing a <case> element.
267struct PartialCase {
268    name: String,
269    task: String,
270    baselines: Vec<BaselineDef>,
271    aql: Option<AqlDef>,
272}
273
274impl PartialCase {
275    fn finish(self) -> Result<BenchCase, String> {
276        if self.baselines.is_empty() {
277            return Err(format!(
278                "Bench case '{}' missing <baseline> element",
279                self.name
280            ));
281        }
282        let aql = self
283            .aql
284            .ok_or_else(|| format!("Bench case '{}' missing <aql> element", self.name))?;
285        Ok(BenchCase {
286            name: self.name,
287            task: self.task,
288            baselines: self.baselines,
289            aql,
290        })
291    }
292}
293
294fn parse_baseline_def(pairs: &[(String, String)]) -> Result<BaselineDef, String> {
295    let baseline_type = get_attr(pairs, "type").unwrap_or("cat");
296    // Defensive: empty path defaults to project root; intentional for project-wide cat.
297    let path = get_attr(pairs, "path").unwrap_or("").trim().to_string();
298    let globs = get_attr(pairs, "globs")
299        .map(parse_globs)
300        .unwrap_or_default();
301
302    match baseline_type {
303        "cat" => Ok(BaselineDef::Cat { path, globs }),
304        "command" => {
305            let name = get_attr(pairs, "name")
306                .unwrap_or("custom")
307                .trim()
308                .to_string();
309            let cmd = get_attr(pairs, "cmd").unwrap_or("").trim().to_string();
310            if cmd.trim().is_empty() {
311                return Err(
312                    "baseline type=\"command\" requires a non-empty cmd attribute".to_string(),
313                );
314            }
315            Ok(BaselineDef::Command { name, cmd })
316        }
317        other => Err(format!("Unknown baseline type: {other}")),
318    }
319}
320
321fn parse_aql_def(pairs: &[(String, String)]) -> Result<AqlDef, String> {
322    let aql_type = get_attr(pairs, "type").unwrap_or("extract");
323    match aql_type {
324        "extract" => {
325            let extractor = get_attr(pairs, "extractor")
326                .unwrap_or("")
327                .trim()
328                .to_string();
329            let path = get_attr(pairs, "path").unwrap_or("").trim().to_string();
330            if extractor.trim().is_empty() {
331                return Err(
332                    "aql type=\"extract\" requires a non-empty extractor attribute".to_string(),
333                );
334            }
335            if path.trim().is_empty() {
336                return Err("aql type=\"extract\" requires a non-empty path attribute".to_string());
337            }
338            Ok(AqlDef::Extract { extractor, path })
339        }
340        "nav-select" => {
341            let path = get_attr(pairs, "path").unwrap_or("").trim().to_string();
342            let selector = get_attr(pairs, "selector").unwrap_or("").trim().to_string();
343            if path.trim().is_empty() {
344                return Err(
345                    "aql type=\"nav-select\" requires a non-empty path attribute".to_string(),
346                );
347            }
348            if selector.trim().is_empty() {
349                return Err(
350                    "aql type=\"nav-select\" requires a non-empty selector attribute".to_string(),
351                );
352            }
353            Ok(AqlDef::NavSelect { path, selector })
354        }
355        "query" => {
356            let selector = get_attr(pairs, "selector").unwrap_or("").trim().to_string();
357            if selector.trim().is_empty() {
358                return Err(
359                    "aql type=\"query\" requires a non-empty selector attribute".to_string()
360                );
361            }
362            Ok(AqlDef::Query {
363                selector,
364                scope: get_attr(pairs, "scope").unwrap_or("").trim().to_string(),
365            })
366        }
367        other => Err(format!("Unknown aql type: {other}")),
368    }
369}
370
371// ── Auto-detect ──────────────────────────────────────────────────────────
372
373/// Auto-generate benchmark cases by scanning the project.
374///
375/// 1. Scan for test files → extract-vs-cat case
376/// 2. Try other extractors (express, react, go-http, go-test) → extract-vs-cat case
377///
378/// Users can add comparison commands (grep, ast-grep, etc.) via `--baseline` or
379/// `.config/aql.bench` `<baseline type="command" .../>` entries.
380pub fn auto_detect_cases(
381    project_root: &ProjectRoot,
382    registry: &ExtractorRegistry,
383    resolvers: &ResolverRegistry,
384) -> Vec<BenchCase> {
385    let mut cases = Vec::new();
386
387    // Find test files via the "test" extractor
388    if let Some(test_ext) = registry.get("test") {
389        let extensions: rustc_hash::FxHashSet<&str> = test_ext
390            .extensions()
391            .iter()
392            .map(|e| e.trim_start_matches('.'))
393            .collect();
394
395        let all_files = glob_source_files(project_root, &Scope::from(""), resolvers);
396        let test_files: Vec<_> = all_files
397            .iter()
398            .filter(|f| {
399                let ext = f.extension().and_then(|e| e.to_str()).unwrap_or("");
400                let fname = f.file_name().and_then(|n| n.to_str()).unwrap_or("");
401                extensions.contains(ext)
402                    && (fname.contains(".test.")
403                        || fname.contains(".spec.")
404                        || fname.contains("_test."))
405            })
406            .collect();
407
408        if !test_files.is_empty() {
409            let test_dir = find_common_dir(&test_files, project_root);
410            let globs: Vec<String> = test_ext
411                .extensions()
412                .iter()
413                .map(|e| format!("*{e}"))
414                .collect();
415
416            cases.push(BenchCase {
417                name: "test-structure".to_string(),
418                task: "List all test suites with nesting".to_string(),
419                baselines: vec![BaselineDef::Cat {
420                    path: test_dir.clone(),
421                    globs,
422                }],
423                aql: AqlDef::Extract {
424                    extractor: "test".to_string(),
425                    path: test_dir,
426                },
427            });
428        }
429    }
430
431    // Try other extractors (express, react, go-http, go_test)
432    let extractor_names = ["express", "react", "go-http", "go_test"];
433    let all_extractor_files = glob_source_files(project_root, &Scope::from(""), resolvers);
434    for ext_name in &extractor_names {
435        if let Some(ext) = registry.get(ext_name) {
436            let all_files = &all_extractor_files;
437            let ext_set: rustc_hash::FxHashSet<&str> = ext
438                .extensions()
439                .iter()
440                .map(|e| e.trim_start_matches('.'))
441                .collect();
442
443            let mut matching: Vec<&std::path::PathBuf> = all_files
444                .iter()
445                .filter(|f| {
446                    let file_ext = f.extension().and_then(|e| e.to_str()).unwrap_or("");
447                    ext_set.contains(file_ext)
448                })
449                .collect();
450
451            if matching.is_empty() {
452                continue;
453            }
454
455            // Sort for determinism before probing
456            matching.sort();
457
458            // Probe up to 10 files to verify this extractor produces output
459            let produces_output = matching.iter().take(10).any(|sample| {
460                let rel = sample
461                    .strip_prefix(project_root.as_ref())
462                    .map(|r| RelativePath::from(r.to_string_lossy().as_ref()))
463                    .unwrap_or_else(|_| RelativePath::from(sample.to_string_lossy().as_ref()));
464                std::fs::read_to_string(sample)
465                    .map(|source| !ext.extract(&source, &rel).is_empty())
466                    .unwrap_or(false)
467            });
468            if !produces_output {
469                continue;
470            }
471
472            let ext_dir = find_common_dir(&matching, project_root);
473            let globs: Vec<String> = ext.extensions().iter().map(|e| format!("*{e}")).collect();
474
475            cases.push(BenchCase {
476                name: format!("{ext_name}-extract"),
477                task: format!("Extract {ext_name} annotations from source"),
478                baselines: vec![BaselineDef::Cat {
479                    path: ext_dir.clone(),
480                    globs,
481                }],
482                aql: AqlDef::Extract {
483                    extractor: ext_name.to_string(),
484                    path: ext_dir,
485                },
486            });
487
488            // One non-test extractor case is sufficient for a representative benchmark.
489            break;
490        }
491    }
492
493    cases
494}
495
496/// Find common directory prefix from a list of path strings.
497fn find_common_dir_from_paths(paths: &[&str]) -> String {
498    if paths.is_empty() {
499        return String::new();
500    }
501    let first_parts: Vec<&str> = paths[0].split('/').collect();
502    let mut common_depth = 0;
503
504    'outer: for i in 0..first_parts.len().saturating_sub(1) {
505        let segment = first_parts[i];
506        for path in &paths[1..] {
507            let other_parts: Vec<&str> = path.split('/').collect();
508            if i >= other_parts.len() || other_parts[i] != segment {
509                break 'outer;
510            }
511        }
512        common_depth = i + 1;
513    }
514
515    if common_depth == 0 {
516        String::new()
517    } else {
518        first_parts[..common_depth].join("/")
519    }
520}
521
522/// Find the common directory prefix for a set of files, relative to project root.
523fn find_common_dir(files: &[&std::path::PathBuf], project_root: &ProjectRoot) -> String {
524    if files.is_empty() {
525        return String::new();
526    }
527
528    let relatives: Vec<String> = files
529        .iter()
530        .filter_map(|f| {
531            f.strip_prefix(project_root.as_ref())
532                .ok()
533                .map(|r| r.to_string_lossy().to_string())
534        })
535        .collect();
536
537    if relatives.is_empty() {
538        return String::new();
539    }
540
541    let refs: Vec<&str> = relatives.iter().map(|s| s.as_str()).collect();
542    find_common_dir_from_paths(&refs)
543}
544
545// ── Runner ───────────────────────────────────────────────────────────────
546
547/// Execute all benchmark cases and collect results.
548///
549/// Each baseline is executed as a real shell command via `sh -c`, like
550/// hyperfine. AQL approaches run in-process. Output bytes are measured
551/// from stdout.
552pub fn run_bench(
553    project_root: &ProjectRoot,
554    cases: &[BenchCase],
555    registry: &ExtractorRegistry,
556    resolvers: &ResolverRegistry,
557) -> Vec<BenchResult> {
558    cases
559        .iter()
560        .map(|case| run_single_case(project_root, case, registry, resolvers))
561        .collect()
562}
563
564fn run_single_case(
565    project_root: &ProjectRoot,
566    case: &BenchCase,
567    registry: &ExtractorRegistry,
568    resolvers: &ResolverRegistry,
569) -> BenchResult {
570    // Task prompt tokens: what the agent receives as the task description.
571    // Simulate: "Implement: {task}\nFiles: {file list}\n"
572    let task_tokens = case.task.len() / 4;
573
574    let mut baselines: Vec<MeasuredOutput> = case
575        .baselines
576        .iter()
577        .map(|b| measure_baseline(project_root, b))
578        .collect();
579
580    // Fill total_tokens = task_tokens + tool output tokens
581    for b in &mut baselines {
582        b.total_tokens = task_tokens + b.tokens;
583    }
584
585    let mut aql = measure_aql(project_root, &case.aql, registry, resolvers);
586    aql.total_tokens = task_tokens + aql.tokens;
587
588    let successful_baselines: Vec<&MeasuredOutput> =
589        baselines.iter().filter(|b| b.error.is_none()).collect();
590
591    let best_baseline_total = successful_baselines
592        .iter()
593        .map(|b| b.total_tokens)
594        .filter(|&t| t > 0)
595        .min()
596        .unwrap_or(0);
597
598    let (ratio, winner) = if aql.error.is_some() {
599        (None, "error".to_string())
600    } else if successful_baselines.is_empty() {
601        (None, "n/a".to_string())
602    } else if aql.total_tokens == 0 || best_baseline_total == 0 {
603        (None, "n/a:no-data".to_string())
604    } else {
605        let r = best_baseline_total as f64 / aql.total_tokens as f64;
606        let w = if aql.total_tokens < best_baseline_total {
607            "aql"
608        } else {
609            "baseline"
610        };
611        (Some(r), w.to_string())
612    };
613
614    BenchResult {
615        name: case.name.clone(),
616        task: case.task.clone(),
617        task_tokens,
618        baselines,
619        aql,
620        ratio,
621        winner,
622    }
623}
624
625/// Execute a baseline by running its shell command via `sh -c`.
626fn measure_baseline(project_root: &ProjectRoot, def: &BaselineDef) -> MeasuredOutput {
627    let (approach_type, cmd) = def.to_shell_command(project_root.as_ref());
628    let start = Instant::now();
629    let result = run_shell_command(&cmd, project_root.as_ref());
630    let wall_ms = start.elapsed().as_millis() as u64;
631    match result {
632        Ok(bytes) => MeasuredOutput {
633            approach_type,
634            bytes,
635            tokens: bytes / 4,
636            total_tokens: 0,
637            wall_ms,
638            error: None,
639        },
640        Err(e) => MeasuredOutput {
641            approach_type,
642            bytes: 0,
643            tokens: 0,
644            total_tokens: 0,
645            wall_ms,
646            error: Some(e),
647        },
648    }
649}
650
651/// Run a shell command via `sh -c` and return stdout byte count.
652///
653/// Enforces a 30-second timeout via a background thread + channel.
654/// On timeout the spawned thread is leaked but the child process will
655/// eventually exit on its own (acceptable for a bench tool).
656fn run_shell_command(cmd: &str, cwd: &Path) -> Result<usize, String> {
657    use std::process::Stdio;
658    use std::sync::mpsc;
659    use std::time::Duration;
660
661    let cmd = cmd.to_string();
662    let cwd = cwd.to_path_buf();
663    let cmd_label: String = cmd.chars().take(80).collect();
664
665    let (tx, rx) = mpsc::channel();
666    std::thread::spawn(move || {
667        let result = std::process::Command::new("sh")
668            .args(["-c", &cmd])
669            .current_dir(&cwd)
670            .stdout(Stdio::piped())
671            .stderr(Stdio::piped())
672            .output();
673        let _ = tx.send(result);
674    });
675
676    match rx.recv_timeout(Duration::from_secs(30)) {
677        Ok(Ok(output)) => {
678            if output.status.success() {
679                Ok(output.stdout.len())
680            } else {
681                let stderr = String::from_utf8_lossy(&output.stderr);
682                let snippet: String = stderr.chars().take(500).collect();
683                Err(if snippet.trim().is_empty() {
684                    format!("command exited with {}", output.status)
685                } else {
686                    format!("command exited with {}: {}", output.status, snippet.trim())
687                })
688            }
689        }
690        Ok(Err(e)) => Err(format!("failed to run command: {e}")),
691        Err(_) => Err(format!("command timed out after 30s: {cmd_label}")),
692    }
693}
694
695fn measure_aql(
696    project_root: &ProjectRoot,
697    def: &AqlDef,
698    registry: &ExtractorRegistry,
699    resolvers: &ResolverRegistry,
700) -> MeasuredOutput {
701    let start = Instant::now();
702
703    let (approach_type, result) = match def {
704        AqlDef::Extract { extractor, path } => {
705            let r = run_extractor_measure(project_root, extractor, path, registry, resolvers);
706            ("extract".to_string(), r)
707        }
708        AqlDef::NavSelect { path, selector } => {
709            let r = if path.contains('\n') {
710                run_nav_select_multi(project_root, path, selector)
711            } else {
712                run_nav_select_measure(project_root, path, selector)
713            };
714            ("nav-select".to_string(), r)
715        }
716        AqlDef::Query { selector, scope } => {
717            let r = run_query_measure(
718                project_root,
719                selector,
720                &Scope::from(scope.as_str()),
721                registry,
722                resolvers,
723            );
724            ("query".to_string(), r)
725        }
726    };
727
728    let wall_ms = start.elapsed().as_millis() as u64;
729
730    match result {
731        Ok(bytes) => MeasuredOutput {
732            approach_type,
733            bytes,
734            tokens: bytes / 4,
735            total_tokens: 0,
736            wall_ms,
737            error: None,
738        },
739        Err(e) => MeasuredOutput {
740            approach_type,
741            bytes: 0,
742            tokens: 0,
743            total_tokens: 0,
744            wall_ms,
745            error: Some(e),
746        },
747    }
748}
749
750/// Run extractor and measure serialized output bytes.
751fn run_extractor_measure(
752    project_root: &ProjectRoot,
753    extractor_name: &str,
754    path: &str,
755    registry: &ExtractorRegistry,
756    resolvers: &ResolverRegistry,
757) -> Result<usize, String> {
758    let builtin = match registry.get(extractor_name) {
759        Some(e) => e,
760        None => return Err(format!("Unknown extractor: {extractor_name}")),
761    };
762
763    let path_scope = Scope::from(path);
764    let source_files = glob_source_files(project_root, &path_scope, resolvers);
765
766    if !path.is_empty() && source_files.is_empty() {
767        return Err(format!("no source files found at path: {path}"));
768    }
769
770    let ext_set: rustc_hash::FxHashSet<&str> = builtin
771        .extensions()
772        .iter()
773        .map(|e| e.trim_start_matches('.'))
774        .collect();
775
776    let mut all_annotations = Vec::new();
777    for file in &source_files {
778        let ext = file.extension().and_then(|e| e.to_str()).unwrap_or("");
779        if !ext_set.contains(ext) {
780            continue;
781        }
782
783        let relative = file
784            .strip_prefix(project_root.as_ref())
785            .map(|r| RelativePath::from(r.to_string_lossy().as_ref()))
786            .unwrap_or_else(|_| RelativePath::from(file.to_string_lossy().as_ref()));
787
788        if let Ok(source) = std::fs::read_to_string(file) {
789            let annotations = builtin.extract(&source, &relative);
790            all_annotations.extend(annotations);
791        }
792    }
793
794    let output = serde_json::json!({ "annotations": all_annotations });
795    serde_json::to_string(&output)
796        .map(|s| s.len())
797        .map_err(|e| format!("serialization failed: {e}"))
798}
799
800/// Run nav-select on a single file and measure serialized output bytes.
801fn run_nav_select_measure(
802    project_root: &ProjectRoot,
803    path: &str,
804    selector: &str,
805) -> Result<usize, String> {
806    let abs = project_root.as_ref().join(path);
807    if !abs.is_file() {
808        return Err(format!("file not found: {path}"));
809    }
810    let rel = RelativePath::from(path);
811    let mut total_bytes = 0;
812
813    for kind in selector.split(',') {
814        let kind = kind.trim();
815        if kind.is_empty() {
816            continue;
817        }
818        let result = navigate::select(project_root, &rel, None, kind).map_err(|e| e.to_string())?;
819        let json =
820            serde_json::to_string(&result).map_err(|e| format!("serialization failed: {e}"))?;
821        total_bytes += json.len();
822    }
823    Ok(total_bytes)
824}
825
826/// Run nav-select across multiple files (newline-separated paths).
827fn run_nav_select_multi(
828    project_root: &ProjectRoot,
829    paths: &str,
830    selector: &str,
831) -> Result<usize, String> {
832    let mut total_bytes = 0;
833    for path in paths.lines() {
834        let path = path.trim();
835        if path.is_empty() {
836            continue;
837        }
838        total_bytes += run_nav_select_measure(project_root, path, selector)?;
839    }
840    Ok(total_bytes)
841}
842
843/// Run unified query and measure serialized output bytes.
844///
845/// Runs all built-in extractors to populate annotations even when no
846/// manifest exists. This measures what an agent would see if AQL was
847/// configured for the project.
848fn run_query_measure(
849    project_root: &ProjectRoot,
850    selector: &str,
851    scope: &Scope,
852    registry: &ExtractorRegistry,
853    resolvers: &ResolverRegistry,
854) -> Result<usize, String> {
855    let mut cache = crate::code_cache::CodeCache::new(project_root);
856    let mut store = AnnotationStore::new(project_root);
857    store.load_all_from_locator();
858
859    // Load from manifest if available
860    let manifest_path = project_root.as_ref().join(".config").join("aql.schema");
861    if manifest_path.is_file() {
862        if let Ok(raw) = std::fs::read_to_string(&manifest_path) {
863            if let Ok(manifest) = crate::manifest::parse_manifest(&raw) {
864                let results =
865                    crate::extractor::run_all_extractors(&manifest, project_root, registry);
866                for result in results {
867                    if !result.annotations.is_empty() {
868                        store.load_extractor_output(result.annotations);
869                    }
870                }
871            }
872        }
873    }
874
875    // Run all built-in extractors on scope to ensure annotations exist
876    let source_files = glob_source_files(project_root, scope, resolvers);
877
878    if !scope.is_empty() && source_files.is_empty() {
879        return Err(format!("no source files found in scope: {scope}"));
880    }
881    // Deduplicate: registry.names() includes aliases; filter by canonical extractor name.
882    let mut seen_ext_names: rustc_hash::FxHashSet<&str> = rustc_hash::FxHashSet::default();
883    for registry_key in registry.names() {
884        if let Some(ext) = registry.get(registry_key) {
885            let canonical = ext.name();
886            if !seen_ext_names.insert(canonical) {
887                continue;
888            }
889            let ext_set: rustc_hash::FxHashSet<&str> = ext
890                .extensions()
891                .iter()
892                .map(|e| e.trim_start_matches('.'))
893                .collect();
894            for file in &source_files {
895                let file_ext = file.extension().and_then(|e| e.to_str()).unwrap_or("");
896                if !ext_set.contains(file_ext) {
897                    continue;
898                }
899                let relative = file
900                    .strip_prefix(project_root.as_ref())
901                    .map(|r| RelativePath::from(r.to_string_lossy().as_ref()))
902                    .unwrap_or_else(|_| RelativePath::from(file.to_string_lossy().as_ref()));
903                if let Ok(source) = std::fs::read_to_string(file) {
904                    let annotations = ext.extract(&source, &relative);
905                    if !annotations.is_empty() {
906                        store.load_extractor_output(annotations);
907                    }
908                }
909            }
910        }
911    }
912
913    let results =
914        crate::query::unified_query(selector, scope, &mut cache, &mut store, resolvers, None)
915            .map_err(|e| e.to_string())?;
916    serde_json::to_string(&results)
917        .map(|s| s.len())
918        .map_err(|e| format!("serialization failed: {e}"))
919}
920
921// ── Formatter ────────────────────────────────────────────────────────────
922
923/// Format benchmark results as a human-readable table.
924/// Detect terminal column width.
925///
926/// Priority: `COLUMNS` env var (set by most shells and iTerm2) → 120 fallback.
927fn terminal_width() -> usize {
928    std::env::var("COLUMNS")
929        .ok()
930        .and_then(|v| v.trim().parse::<usize>().ok())
931        .filter(|&w| w >= 40)
932        .unwrap_or(120)
933}
934
935pub fn format_bench_table(
936    project_name: &str,
937    source_file_count: usize,
938    results: &[BenchResult],
939) -> String {
940    let mut out = String::new();
941    let term_width = terminal_width();
942
943    out.push_str(&format!(
944        "AQL Benchmark — project: {project_name} ({source_file_count} source files)\n\n"
945    ));
946
947    // Column preferred widths (content only, no borders/spaces)
948    // Layout: │ Approach │ Tool output │ Tool tkns │ Agent total │ vs AQL │ Time │
949    // Borders: 2 spaces indent + │ + (col + 2 spaces) * 6 cols + extra │
950    // Fixed overhead per row: 2 (indent) + 1 + (1+1)*6 + 6*1 = 2 + 1 + 12 + 6 = 21, but:
951    // Actually: "  │ {col} │ {col} │ ... │\n" → 2 indent + (3 + col_w) * 6 + 1
952    // Separator overhead = 2 + 1 + 6*(3+col_w) + 1 — compute dynamically.
953
954    const MIN_APPROACH: usize = 6;
955    const MIN_TOOL_OUT: usize = 8;
956    const MIN_TOOL_TKNS: usize = 6;
957    const MIN_AGENT: usize = 8;
958
959    const PREF_APPROACH: usize = 10;
960    const PREF_TOOL_OUT: usize = 12;
961    const PREF_TOOL_TKNS: usize = 10;
962    const PREF_AGENT: usize = 12;
963    const PREF_VS_AQL: usize = 8;
964    const PREF_TIME: usize = 8;
965
966    // Fixed separator overhead: "  │ " + " │ " between cols (5 per col) + " │\n"
967    // 6 cols → 2 + 1 + 6*(1 + col + 1 + 1) + 1 = 2 + 1 + 6*3 + sum(cols) + 1
968    // = 22 + sum(cols)  — simplify: border_overhead = 2 + 1 + 6*3 + 1 = 22
969    let border_overhead: usize = 22; // 2 indent + pipes and spaces for 6 cols
970    let available = term_width.saturating_sub(border_overhead);
971
972    // Distribute available width to columns, compressing in priority order
973    let total_pref =
974        PREF_APPROACH + PREF_TOOL_OUT + PREF_TOOL_TKNS + PREF_AGENT + PREF_VS_AQL + PREF_TIME;
975    let (w_approach, w_tool_out, w_tool_tkns, w_agent, w_vs_aql, w_time) = if available
976        >= total_pref
977    {
978        (
979            PREF_APPROACH,
980            PREF_TOOL_OUT,
981            PREF_TOOL_TKNS,
982            PREF_AGENT,
983            PREF_VS_AQL,
984            PREF_TIME,
985        )
986    } else {
987        // Compress tool_out and tool_tkns first, then approach
988        let fixed = PREF_AGENT + PREF_VS_AQL + PREF_TIME;
989        let compressible = available.saturating_sub(fixed + MIN_APPROACH);
990        let w_tool_out = (compressible / 2).clamp(MIN_TOOL_OUT, PREF_TOOL_OUT);
991        let w_tool_tkns = (compressible / 2).clamp(MIN_TOOL_TKNS, PREF_TOOL_TKNS);
992        let remaining = available.saturating_sub(fixed + w_tool_out + w_tool_tkns);
993        let w_approach = remaining.clamp(MIN_APPROACH, PREF_APPROACH);
994        (
995            w_approach,
996            w_tool_out,
997            w_tool_tkns,
998            PREF_AGENT.min(
999                available
1000                    .saturating_sub(w_approach + w_tool_out + w_tool_tkns + PREF_VS_AQL + PREF_TIME)
1001                    .max(MIN_AGENT),
1002            ),
1003            PREF_VS_AQL,
1004            PREF_TIME,
1005        )
1006    };
1007
1008    // Box drawing helpers
1009    let top = format!(
1010        "  ┌{a}┬{b}┬{c}┬{d}┬{e}┬{f}┐",
1011        a = "─".repeat(w_approach + 2),
1012        b = "─".repeat(w_tool_out + 2),
1013        c = "─".repeat(w_tool_tkns + 2),
1014        d = "─".repeat(w_agent + 2),
1015        e = "─".repeat(w_vs_aql + 2),
1016        f = "─".repeat(w_time + 2),
1017    );
1018    let mid = top.replace('┌', "├").replace('┐', "┤").replace('┬', "┼");
1019    let sep = top.replace('┌', "├").replace('┐', "┤").replace('┬', "┼");
1020    let bot = top.replace('┌', "└").replace('┐', "┘").replace('┬', "┴");
1021
1022    let header = format!(
1023        "  │ {:<wa$} │ {:<wb$} │ {:<wc$} │ {:<wd$} │ {:<we$} │ {:<wf$} │",
1024        "Approach",
1025        "Tool output",
1026        "Tool tkns",
1027        "Agent total",
1028        "vs AQL",
1029        "Time",
1030        wa = w_approach,
1031        wb = w_tool_out,
1032        wc = w_tool_tkns,
1033        wd = w_agent,
1034        we = w_vs_aql,
1035        wf = w_time,
1036    );
1037
1038    for r in results {
1039        out.push_str(&format!("  {}: \"{}\"\n", r.name, r.task));
1040        if r.task_tokens > 0 {
1041            out.push_str(&format!(
1042                "  Task prompt: {} tokens\n",
1043                format_number(r.task_tokens)
1044            ));
1045        }
1046        out.push_str(&format!("{top}\n"));
1047        out.push_str(&format!("{header}\n"));
1048        out.push_str(&format!("{mid}\n"));
1049
1050        let aql_total = if r.aql.error.is_none() && r.aql.total_tokens > 0 {
1051            Some(r.aql.total_tokens)
1052        } else {
1053            None
1054        };
1055
1056        let mut footnotes: Vec<String> = Vec::new();
1057
1058        for b in &r.baselines {
1059            let output_col = if let Some(ref e) = b.error {
1060                let idx = footnotes.len() + 1;
1061                footnotes.push(format!("† {}: {e}", b.approach_type));
1062                format!("ERR†{idx}")
1063            } else {
1064                format_number(b.bytes)
1065            };
1066            let tkns_col = if b.error.is_some() {
1067                "—".to_string()
1068            } else {
1069                format_number(b.tokens)
1070            };
1071            let agent_col = if b.error.is_some() {
1072                "—".to_string()
1073            } else {
1074                format_number(b.total_tokens)
1075            };
1076            let vs_col = if b.error.is_some() {
1077                "—".to_string()
1078            } else if let Some(aql_t) = aql_total {
1079                if aql_t > 0 {
1080                    format!("{:.1}×", b.total_tokens as f64 / aql_t as f64)
1081                } else {
1082                    "—".to_string()
1083                }
1084            } else {
1085                "—".to_string()
1086            };
1087            out.push_str(&format!(
1088                "  │ {:<wa$} │ {:<wb$} │ {:<wc$} │ {:<wd$} │ {:<we$} │ {:<wf$} │\n",
1089                truncate(&b.approach_type, w_approach),
1090                output_col,
1091                tkns_col,
1092                agent_col,
1093                vs_col,
1094                format_ms(b.wall_ms),
1095                wa = w_approach,
1096                wb = w_tool_out,
1097                wc = w_tool_tkns,
1098                wd = w_agent,
1099                we = w_vs_aql,
1100                wf = w_time,
1101            ));
1102        }
1103
1104        out.push_str(&format!("{sep}\n"));
1105
1106        let aql_output_col = if let Some(ref e) = r.aql.error {
1107            let idx = footnotes.len() + 1;
1108            footnotes.push(format!("† aql: {e}"));
1109            format!("ERR†{idx}")
1110        } else {
1111            format_number(r.aql.bytes)
1112        };
1113
1114        out.push_str(&format!(
1115            "  │ {:<wa$} │ {:<wb$} │ {:<wc$} │ {:<wd$} │ {:<we$} │ {:<wf$} │\n",
1116            "aql",
1117            aql_output_col,
1118            if r.aql.error.is_some() {
1119                "—".to_string()
1120            } else {
1121                format_number(r.aql.tokens)
1122            },
1123            if r.aql.error.is_some() {
1124                "—".to_string()
1125            } else {
1126                format_number(r.aql.total_tokens)
1127            },
1128            "—",
1129            format_ms(r.aql.wall_ms),
1130            wa = w_approach,
1131            wb = w_tool_out,
1132            wc = w_tool_tkns,
1133            wd = w_agent,
1134            we = w_vs_aql,
1135            wf = w_time,
1136        ));
1137
1138        out.push_str(&format!("{bot}\n"));
1139
1140        for note in &footnotes {
1141            out.push_str(&format!("  {note}\n"));
1142        }
1143
1144        // Winner line
1145        let winner_line = match r.winner.as_str() {
1146            "error" => "  Winner: error (AQL measurement failed)".to_string(),
1147            "n/a" => "  Winner: n/a (all baselines failed)".to_string(),
1148            "n/a:no-data" => "  Winner: n/a (no output produced)".to_string(),
1149            "aql" => {
1150                if let Some(ratio) = r.ratio {
1151                    format!("  Winner: aql ({ratio:.1}× fewer agent tokens vs best baseline)")
1152                } else {
1153                    "  Winner: aql".to_string()
1154                }
1155            }
1156            _ => {
1157                if let Some(ratio) = r.ratio {
1158                    format!(
1159                        "  Winner: baseline ({:.1}× fewer agent tokens vs aql)",
1160                        1.0 / ratio
1161                    )
1162                } else {
1163                    "  Winner: baseline".to_string()
1164                }
1165            }
1166        };
1167        out.push_str(&format!("{winner_line}\n"));
1168
1169        out.push('\n');
1170    }
1171
1172    out
1173}
1174
1175/// Format a number with comma separators.
1176fn format_number(n: usize) -> String {
1177    let s = n.to_string();
1178    let mut result = String::new();
1179    for (i, ch) in s.chars().rev().enumerate() {
1180        if i > 0 && i % 3 == 0 {
1181            result.push(',');
1182        }
1183        result.push(ch);
1184    }
1185    result.chars().rev().collect()
1186}
1187
1188/// Format milliseconds as a human-readable duration.
1189fn format_ms(ms: u64) -> String {
1190    if ms < 1000 {
1191        format!("{ms}ms")
1192    } else {
1193        format!("{:.1}s", ms as f64 / 1000.0)
1194    }
1195}
1196
1197/// Truncate a string to `max` characters (Unicode-safe).
1198fn truncate(s: &str, max_chars: usize) -> &str {
1199    match s.char_indices().nth(max_chars) {
1200        Some((idx, _)) => &s[..idx],
1201        None => s,
1202    }
1203}
1204
1205/// Count source files in a project.
1206pub fn count_source_files(project_root: &ProjectRoot, resolvers: &ResolverRegistry) -> usize {
1207    glob_source_files(project_root, &Scope::from(""), resolvers).len()
1208}
1209
1210/// Derive a project name from the project root directory.
1211pub fn project_name(project_root: &ProjectRoot) -> String {
1212    project_root
1213        .as_ref()
1214        .file_name()
1215        .and_then(|n| n.to_str())
1216        .unwrap_or("unknown")
1217        .to_string()
1218}
1219
1220// ── Orchestration ────────────────────────────────────────────────────────
1221
1222/// Input for a single bench run. Shims construct this from their input format.
1223#[derive(Debug, Clone)]
1224pub struct BenchRequest {
1225    /// Explicit AQL operation. Mutually exclusive with `config`.
1226    pub aql: Option<AqlDef>,
1227    /// Explicit path for the AQL op (required for NavSelect, optional for others).
1228    pub path: Option<String>,
1229    /// Inline baselines to compare against. If empty and `aql` is set, auto-adds Cat on `path`.
1230    pub baselines: Vec<BaselineDef>,
1231    /// Path to `.config/aql.bench` relative to project root. Mutually exclusive with `aql`.
1232    pub config: Option<String>,
1233}
1234
1235/// Output from a bench run.
1236#[derive(Debug, Clone, Serialize)]
1237pub struct BenchResponse {
1238    pub project: String,
1239    pub source_files: usize,
1240    pub cases: Vec<BenchResult>,
1241}
1242
1243/// Execute a bench request end-to-end: validate, build cases, run, return results.
1244///
1245/// All orchestration logic lives here; shims only parse their input format and
1246/// format output.
1247pub fn execute_bench_request(
1248    root: &ProjectRoot,
1249    req: BenchRequest,
1250    registry: &ExtractorRegistry,
1251    resolvers: &ResolverRegistry,
1252) -> Result<BenchResponse, AqlError> {
1253    // Validate mutual exclusivity
1254    if req.aql.is_some() && req.config.is_some() {
1255        return Err("aql operation and config are mutually exclusive".into());
1256    }
1257
1258    // Validate NavSelect requirements
1259    if let Some(AqlDef::NavSelect { .. }) = &req.aql {
1260        match &req.path {
1261            None => return Err("nav_select requires a path (file)".into()),
1262            Some(p) if root.as_ref().join(p).is_dir() => {
1263                return Err(format!("nav_select requires a file path, not a directory: {p}").into())
1264            }
1265            _ => {}
1266        }
1267    }
1268
1269    // Build cases
1270    let cases: Vec<BenchCase> = if let Some(aql_def) = req.aql {
1271        let path = req.path.as_deref().unwrap_or(".").to_string();
1272        let (case_name, task) = match &aql_def {
1273            AqlDef::Extract { extractor, .. } => (
1274                format!("extract:{extractor}"),
1275                format!("Extract {extractor} annotations from {path}"),
1276            ),
1277            AqlDef::NavSelect { selector, .. } => (
1278                format!("nav-select:{selector}"),
1279                format!("Select {selector} nodes in {path}"),
1280            ),
1281            AqlDef::Query { selector, .. } => (
1282                format!("query:{selector}"),
1283                format!("Query '{selector}' in {path}"),
1284            ),
1285        };
1286        let baselines = if req.baselines.is_empty() {
1287            vec![BaselineDef::Cat {
1288                path: path.clone(),
1289                globs: Vec::new(),
1290            }]
1291        } else {
1292            req.baselines
1293        };
1294        vec![BenchCase {
1295            name: case_name,
1296            task,
1297            baselines,
1298            aql: aql_def,
1299        }]
1300    } else if let Some(config_rel) = req.config {
1301        let config_abs = if std::path::Path::new(&config_rel).is_absolute() {
1302            std::path::PathBuf::from(&config_rel)
1303        } else {
1304            root.as_ref().join(&config_rel)
1305        };
1306        let raw = std::fs::read_to_string(&config_abs)
1307            .map_err(|e| format!("Failed to read config {}: {e}", config_abs.display()))?;
1308        let mut parsed_cases = parse_bench_config(&raw)?.cases;
1309        for case in &mut parsed_cases {
1310            case.baselines.extend(req.baselines.iter().cloned());
1311        }
1312        parsed_cases
1313    } else {
1314        // Auto-detect: try .config/aql.bench first, then structural scan
1315        let default_config = root.as_ref().join(".config").join("aql.bench");
1316        let mut detected = if default_config.is_file() {
1317            let raw = std::fs::read_to_string(&default_config)
1318                .map_err(|e| format!("Failed to read {}: {e}", default_config.display()))?;
1319            parse_bench_config(&raw)?.cases
1320        } else {
1321            auto_detect_cases(root, registry, resolvers)
1322        };
1323        for case in &mut detected {
1324            case.baselines.extend(req.baselines.iter().cloned());
1325        }
1326        detected
1327    };
1328
1329    if cases.is_empty() {
1330        return Err(
1331            "no benchmark cases found — use --extract, --query, --nav-select, or add a .config/aql.bench file".into(),
1332        );
1333    }
1334
1335    for case in &cases {
1336        let mut seen: rustc_hash::FxHashSet<&str> = rustc_hash::FxHashSet::default();
1337        for b in &case.baselines {
1338            let name = match b {
1339                BaselineDef::Cat { .. } => "cat",
1340                BaselineDef::Command { name, .. } => name.as_str(),
1341            };
1342            if !seen.insert(name) {
1343                return Err(
1344                    format!("case '{}': duplicate baseline name '{name}'", case.name).into(),
1345                );
1346            }
1347        }
1348    }
1349
1350    let results = run_bench(root, &cases, registry, resolvers);
1351    Ok(BenchResponse {
1352        project: project_name(root),
1353        source_files: count_source_files(root, resolvers),
1354        cases: results,
1355    })
1356}
1357
1358// ── Tests ────────────────────────────────────────────────────────────────
1359
1360#[cfg(test)]
1361mod tests {
1362    use super::*;
1363
1364    const SAMPLE_CONFIG: &str = r#"
1365<benchmarks>
1366  <case name="test-structure" task="List all test suites with nesting">
1367    <baseline type="cat" path="src/__tests__/" globs="*.test.*,*.spec.*" />
1368    <aql type="extract" extractor="test" path="src/__tests__/" />
1369  </case>
1370  <case name="custom-grep" task="Find describe blocks">
1371    <baseline type="command" name="sg" cmd="sg -p 'describe($$$)' src/" />
1372    <aql type="query" selector="describe" scope="src/" />
1373  </case>
1374  <case name="surgical-read" task="Read only function declarations">
1375    <baseline type="cat" path="src/main.ts" />
1376    <aql type="nav-select" path="src/main.ts" selector="function_declaration" />
1377  </case>
1378</benchmarks>
1379"#;
1380
1381    #[test]
1382    fn parses_bench_config_with_multiple_cases() {
1383        // Act
1384        let config = parse_bench_config(SAMPLE_CONFIG).unwrap();
1385
1386        // Assert
1387        assert_eq!(config.cases.len(), 3, "should parse all three cases");
1388        assert_eq!(
1389            config.cases[0].baselines.len(),
1390            1,
1391            "first case should have one baseline"
1392        );
1393        assert_eq!(config.cases[0].name, "test-structure", "first case name");
1394    }
1395
1396    #[test]
1397    fn parses_baseline_cat() {
1398        // Arrange and Act
1399        let config = parse_bench_config(SAMPLE_CONFIG).unwrap();
1400
1401        // Assert
1402        match &config.cases[0].baselines[0] {
1403            BaselineDef::Cat { path, globs } => {
1404                assert_eq!(path, "src/__tests__/", "cat path");
1405                assert_eq!(globs, &["*.test.*", "*.spec.*"], "cat globs");
1406            }
1407            other => panic!("expected Cat, got {other:?}"),
1408        }
1409    }
1410
1411    #[test]
1412    fn parses_aql_extract() {
1413        // Arrange and Act
1414        let config = parse_bench_config(SAMPLE_CONFIG).unwrap();
1415
1416        // Assert
1417        match &config.cases[0].aql {
1418            AqlDef::Extract { extractor, path } => {
1419                assert_eq!(extractor, "test", "extractor name");
1420                assert_eq!(path, "src/__tests__/", "extractor path");
1421            }
1422            other => panic!("expected Extract, got {other:?}"),
1423        }
1424    }
1425
1426    #[test]
1427    fn parses_command_baseline_in_case() {
1428        // Arrange and Act
1429        let config = parse_bench_config(SAMPLE_CONFIG).unwrap();
1430
1431        // Assert
1432        match &config.cases[1].baselines[0] {
1433            BaselineDef::Command { name, cmd } => {
1434                assert_eq!(name, "sg", "command name");
1435                assert_eq!(cmd, "sg -p 'describe($$$)' src/", "command string");
1436            }
1437            other => panic!("expected Command, got {other:?}"),
1438        }
1439    }
1440
1441    #[test]
1442    fn parses_aql_nav_select() {
1443        // Arrange and Act
1444        let config = parse_bench_config(SAMPLE_CONFIG).unwrap();
1445
1446        // Assert
1447        match &config.cases[2].aql {
1448            AqlDef::NavSelect { path, selector } => {
1449                assert_eq!(path, "src/main.ts", "nav-select path");
1450                assert_eq!(selector, "function_declaration", "nav-select selector");
1451            }
1452            other => panic!("expected NavSelect, got {other:?}"),
1453        }
1454    }
1455
1456    #[test]
1457    fn parses_aql_query() {
1458        // Arrange and Act
1459        let config = parse_bench_config(SAMPLE_CONFIG).unwrap();
1460
1461        // Assert
1462        match &config.cases[1].aql {
1463            AqlDef::Query { selector, scope } => {
1464                assert_eq!(selector, "describe", "query selector");
1465                assert_eq!(scope, "src/", "query scope");
1466            }
1467            other => panic!("expected Query, got {other:?}"),
1468        }
1469    }
1470
1471    #[test]
1472    fn rejects_empty_config() {
1473        // Act
1474        let err = parse_bench_config("<benchmarks></benchmarks>").unwrap_err();
1475
1476        // Assert
1477        assert!(
1478            err.to_string().contains("no <case> elements"),
1479            "should reject empty config"
1480        );
1481    }
1482
1483    #[test]
1484    fn rejects_case_without_baseline() {
1485        // Arrange
1486        let xml = r#"
1487<benchmarks>
1488  <case name="x" task="y">
1489    <aql type="extract" extractor="test" path="." />
1490  </case>
1491</benchmarks>
1492"#;
1493
1494        // Act
1495        let err = parse_bench_config(xml).unwrap_err();
1496
1497        // Assert
1498        assert!(
1499            err.to_string().contains("missing <baseline>"),
1500            "should reject case without baseline"
1501        );
1502    }
1503
1504    #[test]
1505    fn rejects_case_without_aql() {
1506        // Arrange
1507        let xml = r#"
1508<benchmarks>
1509  <case name="x" task="y">
1510    <baseline type="cat" path="." />
1511  </case>
1512</benchmarks>
1513"#;
1514
1515        // Act
1516        let err = parse_bench_config(xml).unwrap_err();
1517
1518        // Assert
1519        assert!(
1520            err.to_string().contains("missing <aql>"),
1521            "should reject case without aql"
1522        );
1523    }
1524
1525    #[test]
1526    fn formats_number_with_commas() {
1527        // Assert
1528        assert_eq!(format_number(0), "0", "zero");
1529        assert_eq!(format_number(999), "999", "three digits");
1530        assert_eq!(format_number(1_000), "1,000", "four digits");
1531        assert_eq!(format_number(136_380), "136,380", "six digits");
1532        assert_eq!(format_number(1_000_000), "1,000,000", "seven digits");
1533    }
1534
1535    #[test]
1536    fn formats_bench_table_multi_baseline() {
1537        // Arrange
1538        let results = vec![BenchResult {
1539            name: "test-structure".to_string(),
1540            task: "List all test suites".to_string(),
1541            task_tokens: 5,
1542            baselines: vec![
1543                MeasuredOutput {
1544                    approach_type: "cat".to_string(),
1545                    bytes: 136_380,
1546                    tokens: 34_095,
1547                    total_tokens: 34_100,
1548                    wall_ms: 12,
1549                    error: None,
1550                },
1551                MeasuredOutput {
1552                    approach_type: "grep".to_string(),
1553                    bytes: 20_000,
1554                    tokens: 5_000,
1555                    total_tokens: 5_005,
1556                    wall_ms: 8,
1557                    error: None,
1558                },
1559                MeasuredOutput {
1560                    approach_type: "ast-grep".to_string(),
1561                    bytes: 36_000,
1562                    tokens: 9_000,
1563                    total_tokens: 9_005,
1564                    wall_ms: 45,
1565                    error: None,
1566                },
1567            ],
1568            aql: MeasuredOutput {
1569                approach_type: "extract".to_string(),
1570                bytes: 21_667,
1571                tokens: 5_416,
1572                total_tokens: 5_421,
1573                wall_ms: 26,
1574                error: None,
1575            },
1576            ratio: Some(0.92),
1577            winner: "baseline".to_string(),
1578        }];
1579
1580        // Act
1581        let table = format_bench_table("testproj", 42, &results);
1582
1583        // Assert
1584        assert!(table.contains("testproj"), "should contain project name");
1585        assert!(
1586            table.contains("42 source files"),
1587            "should contain file count"
1588        );
1589        assert!(table.contains("cat"), "should show cat baseline");
1590        assert!(table.contains("grep"), "should show grep baseline");
1591        assert!(table.contains("ast-grep"), "should show ast-grep baseline");
1592        assert!(table.contains("aql"), "should show AQL");
1593        assert!(table.contains("34,095"), "should show cat tokens");
1594        assert!(table.contains("5,416"), "should show AQL tokens");
1595    }
1596
1597    #[test]
1598    fn shell_escape_works() {
1599        // Assert
1600        assert_eq!(shell_escape("hello"), "'hello'", "simple string");
1601        assert_eq!(
1602            shell_escape("it's"),
1603            "'it'\\''s'",
1604            "string with single quote"
1605        );
1606    }
1607
1608    #[test]
1609    fn globs_to_find_names_works() {
1610        // Arrange
1611        let globs = vec!["*.test.*".to_string(), "*.spec.*".to_string()];
1612
1613        // Act
1614        let result = globs_to_find_names(&globs);
1615
1616        // Assert
1617        assert!(result.contains("-name '*.test.*'"), "first glob");
1618        assert!(
1619            result.contains("-o -name '*.spec.*'"),
1620            "second glob with -o"
1621        );
1622    }
1623
1624    #[test]
1625    fn baseline_cat_to_command() {
1626        // Arrange
1627        let def = BaselineDef::Cat {
1628            path: "src/main.ts".to_string(),
1629            globs: Vec::new(),
1630        };
1631
1632        // Act
1633        let (name, cmd) = def.to_shell_command(Path::new("/tmp/proj"));
1634
1635        // Assert
1636        assert_eq!(name, "cat", "approach name");
1637        assert!(cmd.contains("cat"), "should use cat");
1638        assert!(cmd.contains("src/main.ts"), "should include path");
1639    }
1640
1641    #[test]
1642    fn baseline_command_passthrough() {
1643        // Arrange
1644        let def = BaselineDef::Command {
1645            name: "ripgrep".to_string(),
1646            cmd: "rg -n 'fn ' src/".to_string(),
1647        };
1648
1649        // Act
1650        let (name, cmd) = def.to_shell_command(Path::new("/tmp/proj"));
1651
1652        // Assert
1653        assert_eq!(name, "ripgrep", "custom name");
1654        assert_eq!(cmd, "rg -n 'fn ' src/", "command passthrough");
1655    }
1656
1657    #[test]
1658    fn parses_command_baseline() {
1659        // Arrange
1660        let xml = r#"
1661<benchmarks>
1662  <case name="custom" task="Custom grep">
1663    <baseline type="command" name="ripgrep" cmd="rg -n 'fn ' src/" />
1664    <aql type="extract" extractor="test" path="src/" />
1665  </case>
1666</benchmarks>
1667"#;
1668
1669        // Act
1670        let config = parse_bench_config(xml).unwrap();
1671
1672        // Assert
1673        match &config.cases[0].baselines[0] {
1674            BaselineDef::Command { name, cmd } => {
1675                assert_eq!(name, "ripgrep", "command name");
1676                assert_eq!(cmd, "rg -n 'fn ' src/", "command string");
1677            }
1678            other => panic!("expected Command, got {other:?}"),
1679        }
1680    }
1681
1682    #[test]
1683    fn common_dir_from_paths() {
1684        // Arrange
1685        let paths = vec!["src/foo/a.ts", "src/foo/b.ts", "src/foo/bar/c.ts"];
1686
1687        // Act
1688        let common = find_common_dir_from_paths(&paths);
1689
1690        // Assert
1691        assert_eq!(common, "src/foo", "should find common prefix");
1692    }
1693
1694    #[test]
1695    fn common_dir_no_overlap() {
1696        // Arrange
1697        let paths = vec!["src/a.ts", "lib/b.ts"];
1698
1699        // Act
1700        let common = find_common_dir_from_paths(&paths);
1701
1702        // Assert
1703        assert_eq!(common, "", "no common prefix");
1704    }
1705
1706    #[test]
1707    fn format_ms_works() {
1708        // Assert
1709        assert_eq!(format_ms(0), "0ms", "zero");
1710        assert_eq!(format_ms(42), "42ms", "under a second");
1711        assert_eq!(format_ms(1500), "1.5s", "over a second");
1712    }
1713
1714    #[test]
1715    fn truncate_works() {
1716        // Assert
1717        assert_eq!(truncate("short", 10), "short", "no truncation needed");
1718        assert_eq!(truncate("a-very-long-name", 10), "a-very-lon", "truncated");
1719    }
1720
1721    #[test]
1722    fn winner_is_na_when_all_baselines_fail() {
1723        // Arrange
1724        let root = crate::types::ProjectRoot::from(std::path::Path::new("/nonexistent_path_xyz"));
1725        let registry = crate::extractor::ExtractorRegistry::with_defaults();
1726        let resolvers = crate::resolver::ResolverRegistry::with_defaults();
1727        let case = BenchCase {
1728            name: "test-fail".to_string(),
1729            task: "test task".to_string(),
1730            baselines: vec![BaselineDef::Command {
1731                name: "fail".to_string(),
1732                cmd: "exit 1".to_string(),
1733            }],
1734            aql: AqlDef::Extract {
1735                extractor: "test".to_string(),
1736                path: "/nonexistent".to_string(),
1737            },
1738        };
1739
1740        // Act
1741        let results = run_bench(&root, &[case], &registry, &resolvers);
1742
1743        // Assert
1744        assert_eq!(results.len(), 1, "should return one result");
1745        let r = &results[0];
1746        assert!(r.baselines[0].error.is_some(), "baseline should have error");
1747        assert!(
1748            r.winner == "n/a" || r.winner == "error" || r.winner == "n/a:no-data",
1749            "winner should be n/a, n/a:no-data, or error when measurement fails, got: {}",
1750            r.winner
1751        );
1752    }
1753
1754    #[test]
1755    fn winner_is_error_when_aql_fails() {
1756        // Arrange
1757        let root = crate::types::ProjectRoot::from(std::path::Path::new("/tmp"));
1758        let registry = crate::extractor::ExtractorRegistry::with_defaults();
1759        let resolvers = crate::resolver::ResolverRegistry::with_defaults();
1760        let case = BenchCase {
1761            name: "test-aql-fail".to_string(),
1762            task: "test task".to_string(),
1763            baselines: vec![BaselineDef::Command {
1764                name: "echo".to_string(),
1765                cmd: "echo hello".to_string(),
1766            }],
1767            aql: AqlDef::Extract {
1768                extractor: "nonexistent_extractor_xyz".to_string(),
1769                path: ".".to_string(),
1770            },
1771        };
1772
1773        // Act
1774        let results = run_bench(&root, &[case], &registry, &resolvers);
1775
1776        // Assert
1777        assert_eq!(results.len(), 1, "should return one result");
1778        let r = &results[0];
1779        assert!(
1780            r.aql.error.is_some(),
1781            "AQL should have error for unknown extractor"
1782        );
1783        assert_eq!(r.winner, "error", "winner should be 'error' when AQL fails");
1784        assert!(r.ratio.is_none(), "ratio should be None when AQL fails");
1785    }
1786
1787    #[test]
1788    fn execute_bench_request_rejects_duplicate_baseline_names() {
1789        // Arrange
1790        let root = crate::types::ProjectRoot::from(std::path::Path::new("/tmp"));
1791        let registry = crate::extractor::ExtractorRegistry::with_defaults();
1792        let resolvers = crate::resolver::ResolverRegistry::with_defaults();
1793        let req = BenchRequest {
1794            aql: Some(AqlDef::Extract {
1795                extractor: "test".to_string(),
1796                path: ".".to_string(),
1797            }),
1798            path: Some(".".to_string()),
1799            baselines: vec![
1800                BaselineDef::Command {
1801                    name: "rg".to_string(),
1802                    cmd: "echo a".to_string(),
1803                },
1804                BaselineDef::Command {
1805                    name: "rg".to_string(),
1806                    cmd: "echo b".to_string(),
1807                },
1808            ],
1809            config: None,
1810        };
1811
1812        // Act
1813        let result = execute_bench_request(&root, req, &registry, &resolvers);
1814
1815        // Assert
1816        assert!(result.is_err(), "should reject duplicate baseline names");
1817        let msg = result.unwrap_err().to_string();
1818        assert!(
1819            msg.contains("duplicate"),
1820            "error should mention 'duplicate', got: {msg}"
1821        );
1822    }
1823
1824    #[test]
1825    fn format_bench_table_shows_no_output_for_zero_tokens() {
1826        // Arrange
1827        let results = vec![BenchResult {
1828            name: "zero-case".to_string(),
1829            task: "task".to_string(),
1830            task_tokens: 0,
1831            baselines: vec![MeasuredOutput {
1832                approach_type: "cat".to_string(),
1833                bytes: 0,
1834                tokens: 0,
1835                total_tokens: 0,
1836                wall_ms: 0,
1837                error: None,
1838            }],
1839            aql: MeasuredOutput {
1840                approach_type: "extract".to_string(),
1841                bytes: 0,
1842                tokens: 0,
1843                total_tokens: 0,
1844                wall_ms: 0,
1845                error: None,
1846            },
1847            ratio: None,
1848            winner: "n/a:no-data".to_string(),
1849        }];
1850
1851        // Act
1852        let table = format_bench_table("test-project", 10, &results);
1853
1854        // Assert
1855        assert!(
1856            table.contains("no output produced"),
1857            "table should explain zero-token n/a, got: {table}"
1858        );
1859    }
1860}