research/
cli.rs

1//! CLI argument parsing + dispatch. All 12 subcommands resolve to
2//! handlers in `commands::*` that, in MVP #1, return NOT_IMPLEMENTED.
3
4use clap::{Parser, Subcommand};
5use std::process::ExitCode;
6
7use crate::commands;
8use crate::output::Envelope;
9
10#[derive(Parser, Debug)]
11#[command(
12    name = "research",
13    about = "Research workflow CLI — orchestrate postagent + actionbook for reproducible research sessions",
14    disable_version_flag = true
15)]
16pub struct Cli {
17    /// JSON output (default is plain text)
18    #[arg(long, global = true)]
19    pub json: bool,
20
21    /// Increase logging verbosity (to stderr)
22    #[arg(long, short = 'v', global = true, action = clap::ArgAction::Count)]
23    pub verbose: u8,
24
25    /// Disable ANSI color in plain-text output
26    #[arg(long, global = true)]
27    pub no_color: bool,
28
29    #[command(subcommand)]
30    pub command: Option<Commands>,
31}
32
33#[derive(Subcommand, Debug)]
34#[command(disable_help_subcommand = true)]
35pub enum Commands {
36    /// Create a new research session and set it active.
37    New {
38        topic: String,
39        #[arg(long)]
40        preset: Option<String>,
41        #[arg(long)]
42        slug: Option<String>,
43        #[arg(long)]
44        force: bool,
45        /// Fork from a parent session — copies its ## Overview as ## Context.
46        #[arg(long = "from")]
47        from: Option<String>,
48        /// Tag this session (repeatable). Inherited from --from if provided.
49        #[arg(long = "tag", action = clap::ArgAction::Append)]
50        tag: Vec<String>,
51    },
52    /// List all research sessions.
53    List {
54        /// Filter by tag.
55        #[arg(long)]
56        tag: Option<String>,
57        /// Show parent→child hierarchy as an ASCII tree.
58        #[arg(long)]
59        tree: bool,
60    },
61    /// Print a session.md to stdout so an agent can resume context.
62    Show { slug: String },
63    /// Show counts + timings for the current or given session.
64    Status { slug: Option<String> },
65    /// Inspect session.jsonl as a compact audit trail for hand calls, facts, and synthesis.
66    Audit { slug: Option<String> },
67    /// Audit GitHub repository trust signals.
68    #[command(name = "github-audit")]
69    GithubAudit {
70        repo: String,
71        #[arg(long, default_value = "stargazers")]
72        depth: String,
73        #[arg(long, default_value_t = 200)]
74        sample: usize,
75        #[arg(long)]
76        out: Option<String>,
77        #[arg(long)]
78        html: Option<String>,
79    },
80    /// Set a session active again and print its session.md + recent events.
81    Resume { slug: String },
82    /// Route + fetch + smell-test a URL and attach to the active session.
83    Add {
84        url: String,
85        #[arg(long)]
86        slug: Option<String>,
87        #[arg(long)]
88        timeout: Option<u64>,
89        #[arg(long)]
90        readable: bool,
91        #[arg(long)]
92        no_readable: bool,
93        /// Override smell-test min body bytes (browser path only).
94        #[arg(long = "min-bytes")]
95        min_bytes: Option<u64>,
96        /// Short-body behavior: "reject" (default) or "warn".
97        #[arg(long = "on-short-body")]
98        on_short_body: Option<String>,
99        /// V2-only: run the inline JS inside the given iframe (OOPIF
100        /// support). Default = top frame. Requires
101        /// `ACTIONBOOK_BACKEND=v2-mcp` (default). Must be >= 0.
102        #[arg(long = "frame-id", allow_hyphen_values = true, value_parser = parse_frame_id)]
103        frame_id: Option<u32>,
104        /// V2-only: JSON array forwarded to the inline JS as `$args`.
105        /// Requires `ACTIONBOOK_BACKEND=v2-mcp` (default). Must be a JSON
106        /// array (empty `[]` is allowed; non-array JSON is rejected).
107        #[arg(long = "run-code-args", value_parser = parse_run_code_args)]
108        run_code_args: Option<String>,
109        /// Force re-fetch of any catalog-seeded wiki manuals (overwrite
110        /// existing pages with fresh `fetched_at`). Default is skip-if-
111        /// exists; see `specs/actionbook-catalog-seed.spec.md`.
112        #[arg(long)]
113        reseed: bool,
114    },
115    /// Bulk-ingest a local file or directory tree as sources.
116    ///
117    /// Walks the path, applies optional --glob include/exclude patterns
118    /// (prefix with `!` to exclude), enforces per-file and per-walk size
119    /// caps, and attaches each accepted file as its own source via the
120    /// same pipeline as `research add file:///...`.
121    #[command(name = "add-local")]
122    AddLocal {
123        /// File or directory to ingest. Accepts `file://`, absolute,
124        /// relative (./x), home-relative (~/x), or bare path.
125        path: String,
126        #[arg(long)]
127        slug: Option<String>,
128        /// Glob pattern (repeatable). Prefix with `!` to exclude.
129        /// Examples: `--glob '**/*.rs'  --glob '!**/test/**'`.
130        /// If omitted, matches all files.
131        #[arg(long = "glob", action = clap::ArgAction::Append)]
132        glob: Vec<String>,
133        /// Per-file cap in bytes. Files over this are skipped with a
134        /// `too_large` reason. Default 256 KiB.
135        #[arg(long = "max-file-bytes")]
136        max_file_bytes: Option<u64>,
137        /// Total cap for the whole walk. Walk stops (not truncates)
138        /// when this would be exceeded. Default 2 MiB.
139        #[arg(long = "max-total-bytes")]
140        max_total_bytes: Option<u64>,
141        /// Original online URL represented by the local cache/source note.
142        /// Used for fallback provenance in session.jsonl.
143        #[arg(long = "original-url")]
144        original_url: Option<String>,
145        /// Tool that produced the local fallback artifact, e.g. curl,
146        /// browser-cache, web, manual.
147        #[arg(long = "origin-tool")]
148        origin_tool: Option<String>,
149        /// Human-readable reason for the fallback ingest.
150        #[arg(long = "origin-note")]
151        origin_note: Option<String>,
152    },
153    /// List sources attached to the current or given session.
154    Sources {
155        slug: Option<String>,
156        #[arg(long)]
157        rejected: bool,
158    },
159    /// Route + fetch + smell-test multiple URLs in parallel.
160    Batch {
161        /// One or more URLs to fetch concurrently.
162        urls: Vec<String>,
163        #[arg(long)]
164        slug: Option<String>,
165        /// Worker threads (1–16, default 4).
166        #[arg(long)]
167        concurrency: Option<usize>,
168        #[arg(long)]
169        timeout: Option<u64>,
170        #[arg(long)]
171        readable: bool,
172        #[arg(long)]
173        no_readable: bool,
174        /// Override smell-test min body bytes (browser path only).
175        #[arg(long = "min-bytes")]
176        min_bytes: Option<u64>,
177        /// Short-body behavior: "reject" (default) or "warn".
178        #[arg(long = "on-short-body")]
179        on_short_body: Option<String>,
180        /// V2-only: run the inline JS inside the given iframe for every
181        /// URL in the batch. Default = top frame. Requires
182        /// `ACTIONBOOK_BACKEND=v2-mcp` (default). Must be >= 0.
183        #[arg(long = "frame-id", allow_hyphen_values = true, value_parser = parse_frame_id)]
184        frame_id: Option<u32>,
185        /// V2-only: JSON array forwarded to the inline JS as `$args` for
186        /// every URL in the batch (shared, not per-url). Requires
187        /// `ACTIONBOOK_BACKEND=v2-mcp` (default). Must be a JSON array.
188        #[arg(long = "run-code-args", value_parser = parse_run_code_args)]
189        run_code_args: Option<String>,
190        /// Force re-fetch of any catalog-seeded wiki manuals (overwrite
191        /// existing pages). Applies to every URL in the batch. See
192        /// `specs/actionbook-catalog-seed.spec.md`.
193        #[arg(long)]
194        reseed: bool,
195    },
196    /// Synthesize session.md + raw/ into report.json + report.html.
197    Synthesize {
198        slug: Option<String>,
199        #[arg(long)]
200        no_render: bool,
201        #[arg(long)]
202        open: bool,
203        /// Also render Chinese translations next to each English paragraph
204        /// in report.html. Requires a working LLM provider; choose one with
205        /// ASR_BILINGUAL_PROVIDER=claude|codex. Costs tokens proportional
206        /// to report length.
207        #[arg(long)]
208        bilingual: bool,
209        /// Also convert the rendered report.html to report.pdf using
210        /// isolated local Chromium.
211        #[arg(long)]
212        pdf: bool,
213        /// Explicit PDF output path. Implies --pdf.
214        #[arg(long = "pdf-output")]
215        pdf_output: Option<String>,
216    },
217    /// Run the completion protocol: coverage -> synthesize -> audit.
218    Finish {
219        slug: String,
220        #[arg(long)]
221        open: bool,
222        #[arg(long)]
223        bilingual: bool,
224    },
225    /// Render an editorial report from a session (rich-html and future formats).
226    Report {
227        slug: Option<String>,
228        /// Output format. Supported: rich-html, brief-md.
229        #[arg(long)]
230        format: String,
231        #[arg(long)]
232        open: bool,
233        #[arg(long = "no-open")]
234        no_open: bool,
235        /// (brief-md only) print to stdout instead of writing a file.
236        #[arg(long)]
237        stdout: bool,
238        /// (brief-md only) explicit output path; default: <session>/report-brief.md.
239        #[arg(long)]
240        output: Option<String>,
241    },
242    /// Mark a session closed (files preserved).
243    Close { slug: Option<String> },
244    /// Remove a session directory.
245    Rm {
246        slug: String,
247        #[arg(long)]
248        force: bool,
249    },
250    /// Classify a URL: which executor + command template.
251    Route {
252        url: String,
253        #[arg(long)]
254        prefer: Option<String>,
255        #[arg(long)]
256        rules: Option<String>,
257        #[arg(long)]
258        preset: Option<String>,
259    },
260    /// Generate an HTML index page for all sessions with a given tag.
261    Series {
262        tag: String,
263        #[arg(long)]
264        open: bool,
265    },
266    /// Diff: list sources fetched-but-uncited (unused) and body-but-unfetched (hallucinated).
267    Diff {
268        slug: Option<String>,
269        /// Only list unused sources; omit the hallucinated/missing set.
270        #[arg(long = "unused-only")]
271        unused_only: bool,
272    },
273    /// Coverage: fact-based completeness stats + report_ready blockers.
274    Coverage { slug: Option<String> },
275    /// Verify local prerequisites for the skill/playbooks without creating a session.
276    Doctor {
277        /// Also do a live one-shot LLM provider call. This can spend tokens.
278        #[arg(long = "provider-smoke")]
279        provider_smoke: bool,
280        /// Also exercise postagent/actionbook command surfaces.
281        #[arg(long = "tool-smoke")]
282        tool_smoke: bool,
283        /// Provider to smoke-test: claude | codex | all.
284        #[arg(long = "provider", default_value = "all")]
285        provider: String,
286    },
287    /// Run the autonomous research loop (feature: autoresearch).
288    #[cfg(feature = "autoresearch")]
289    Loop {
290        slug: Option<String>,
291        /// LLM provider: fake | claude | codex | opencode-go.
292        #[arg(long, default_value = "fake")]
293        provider: String,
294        #[arg(long)]
295        iterations: Option<u32>,
296        #[arg(long = "max-actions")]
297        max_actions: Option<u32>,
298        #[arg(long = "dry-run")]
299        dry_run: bool,
300        /// (fake provider only) semicolon-separated JSON responses to replay.
301        #[arg(long = "fake-responses")]
302        fake_responses: Option<String>,
303    },
304    /// Inspect the per-session wiki (v3).
305    Wiki {
306        #[command(subcommand)]
307        sub: WikiCmd,
308    },
309    /// Show or edit the per-session SCHEMA.md (v3).
310    Schema {
311        #[command(subcommand)]
312        sub: SchemaCmd,
313    },
314    /// Show help (alias of --help).
315    Help,
316}
317
318#[derive(Subcommand, Debug)]
319pub enum SchemaCmd {
320    /// Print the session's SCHEMA.md.
321    Show {
322        #[arg(long)]
323        slug: Option<String>,
324    },
325    /// Open `$EDITOR` on the session's SCHEMA.md; logs `SchemaUpdated`
326    /// on change so the loop re-reads it next iteration.
327    Edit {
328        #[arg(long)]
329        slug: Option<String>,
330    },
331}
332
333#[derive(Subcommand, Debug)]
334pub enum WikiCmd {
335    /// List every wiki page in a session with slug, bytes, frontmatter kind.
336    List {
337        #[arg(long)]
338        slug: Option<String>,
339    },
340    /// Print one wiki page to stdout.
341    Show {
342        /// The page slug (filename without `.md`).
343        page: String,
344        #[arg(long)]
345        slug: Option<String>,
346    },
347    /// Remove a wiki page. Dry-run unless `--force` is passed.
348    Rm {
349        /// The page slug to remove.
350        page: String,
351        #[arg(long)]
352        slug: Option<String>,
353        #[arg(long)]
354        force: bool,
355    },
356    /// Ask a question over the session's wiki; optionally save answer
357    /// as a `kind: analysis` page via `--save-as <slug>`.
358    Query {
359        /// The question to ask.
360        question: String,
361        #[arg(long)]
362        slug: Option<String>,
363        /// Save the answer as `wiki/<slug>.md` with `kind: analysis`.
364        #[arg(long = "save-as")]
365        save_as: Option<String>,
366        /// Answer shape: prose (default) | comparison | table.
367        #[arg(long)]
368        format: Option<String>,
369        /// LLM provider: fake | claude | codex | opencode-go.
370        #[arg(long, default_value = "claude")]
371        provider: String,
372    },
373    /// Health check over the wiki (orphans, broken links, stale pages,
374    /// missing crossrefs, kind conflicts). Never blocks coverage.
375    Lint {
376        #[arg(long)]
377        slug: Option<String>,
378        /// Flag pages whose `updated:` frontmatter is older than this
379        /// many days. Default 7.
380        #[arg(long = "stale-days")]
381        stale_days: Option<i64>,
382    },
383}
384
385/// Clap value parser for `--frame-id`. We accept the value as a signed
386/// integer first so a negative value produces a helpful "must be >= 0"
387/// error string (spec § 验收标准 / `add_cli_rejects_negative_frame_id`).
388/// `allow_hyphen_values` on the field lets `-1` reach this parser
389/// instead of being mis-interpreted as another flag.
390fn parse_frame_id(s: &str) -> Result<u32, String> {
391    let v: i64 = s.parse().map_err(|_| {
392        format!("'--frame-id' value '{s}' is not a valid integer (frame-id must be >= 0)")
393    })?;
394    if v < 0 {
395        return Err(format!("frame-id must be >= 0 (got {v})"));
396    }
397    u32::try_from(v).map_err(|_| format!("frame-id too large: {v}"))
398}
399
400/// Clap value parser for `--run-code-args`. The CLI layer enforces
401/// "must parse as a JSON array" (spec § 验收标准 — two distinct error
402/// shapes:malformed JSON vs valid-but-non-array JSON). Returns the
403/// original string on success; the JSON value is re-parsed downstream in
404/// the command handler when we hand it to `fetch::execute`.
405fn parse_run_code_args(s: &str) -> Result<String, String> {
406    let v: serde_json::Value = serde_json::from_str(s)
407        .map_err(|e| format!("invalid JSON for --run-code-args: {e} (expected JSON array)"))?;
408    if !v.is_array() {
409        return Err(format!(
410            "--run-code-args must be a JSON array (got {})",
411            json_type_name(&v)
412        ));
413    }
414    Ok(s.to_string())
415}
416
417fn json_type_name(v: &serde_json::Value) -> &'static str {
418    match v {
419        serde_json::Value::Null => "null",
420        serde_json::Value::Bool(_) => "boolean",
421        serde_json::Value::Number(_) => "number",
422        serde_json::Value::String(_) => "string",
423        serde_json::Value::Array(_) => "array",
424        serde_json::Value::Object(_) => "object",
425    }
426}
427
428/// Entry point used by `main.rs`. Returns the process exit code.
429pub fn run() -> ExitCode {
430    let cli = Cli::parse();
431    let json = cli.json;
432
433    let (envelope, github_audit_plain) = match cli.command {
434        None => {
435            // bare `research` with no subcommand: print help via clap and exit 0
436            use clap::CommandFactory;
437            let mut cmd = Cli::command();
438            let _ = cmd.print_help();
439            println!();
440            return ExitCode::SUCCESS;
441        }
442        Some(Commands::Help) => {
443            use clap::CommandFactory;
444            let mut cmd = Cli::command();
445            let _ = cmd.print_help();
446            println!();
447            return ExitCode::SUCCESS;
448        }
449        Some(cmd) => {
450            let github_audit_plain = matches!(cmd, Commands::GithubAudit { .. });
451            (dispatch(cmd), github_audit_plain)
452        }
453    };
454
455    if github_audit_plain && !json {
456        commands::github_audit::render_plain_summary(&envelope);
457    } else {
458        envelope.render(json);
459    }
460    if envelope.ok {
461        ExitCode::SUCCESS
462    } else {
463        // 64 = EX_USAGE per sysexits.h; keep single non-zero code for MVP
464        ExitCode::from(64)
465    }
466}
467
468fn dispatch(cmd: Commands) -> Envelope {
469    match cmd {
470        Commands::New {
471            topic,
472            preset,
473            slug,
474            force,
475            from,
476            tag,
477        } => commands::new::run(
478            &topic,
479            preset.as_deref(),
480            slug.as_deref(),
481            force,
482            from.as_deref(),
483            &tag,
484        ),
485        Commands::List { tag, tree } => commands::list::run(tag.as_deref(), tree),
486        Commands::Show { slug } => commands::show::run(&slug),
487        Commands::Status { slug } => commands::status::run(slug.as_deref()),
488        Commands::Audit { slug } => commands::audit::run(slug.as_deref()),
489        Commands::GithubAudit {
490            repo,
491            depth,
492            sample,
493            out,
494            html,
495        } => commands::github_audit::run(&repo, &depth, sample, out.as_deref(), html.as_deref()),
496        Commands::Resume { slug } => commands::resume::run(&slug),
497        Commands::Add {
498            url,
499            slug,
500            timeout,
501            readable,
502            no_readable,
503            min_bytes,
504            on_short_body,
505            frame_id,
506            run_code_args,
507            reseed,
508        } => commands::add::run(
509            &url,
510            slug.as_deref(),
511            timeout,
512            readable,
513            no_readable,
514            min_bytes,
515            on_short_body.as_deref(),
516            frame_id,
517            run_code_args.as_deref(),
518            reseed,
519        ),
520        Commands::AddLocal {
521            path,
522            slug,
523            glob,
524            max_file_bytes,
525            max_total_bytes,
526            original_url,
527            origin_tool,
528            origin_note,
529        } => commands::add_local::run(
530            &path,
531            slug.as_deref(),
532            &glob,
533            max_file_bytes,
534            max_total_bytes,
535            original_url.as_deref(),
536            origin_tool.as_deref(),
537            origin_note.as_deref(),
538        ),
539        Commands::Sources { slug, rejected } => commands::sources::run(slug.as_deref(), rejected),
540        Commands::Batch {
541            urls,
542            slug,
543            concurrency,
544            timeout,
545            readable,
546            no_readable,
547            min_bytes,
548            on_short_body,
549            frame_id,
550            run_code_args,
551            reseed,
552        } => commands::batch::run(
553            &urls,
554            slug.as_deref(),
555            concurrency,
556            timeout,
557            readable,
558            no_readable,
559            min_bytes,
560            on_short_body.as_deref(),
561            frame_id,
562            run_code_args.as_deref(),
563            reseed,
564        ),
565        Commands::Synthesize {
566            slug,
567            no_render,
568            open,
569            bilingual,
570            pdf,
571            pdf_output,
572        } => commands::synthesize::run(
573            slug.as_deref(),
574            no_render,
575            open,
576            bilingual,
577            pdf || pdf_output.is_some(),
578            pdf_output.as_deref(),
579        ),
580        Commands::Finish {
581            slug,
582            open,
583            bilingual,
584        } => commands::finish::run(&slug, open, bilingual),
585        Commands::Report {
586            slug,
587            format,
588            open,
589            no_open,
590            stdout,
591            output,
592        } => commands::report::run(
593            slug.as_deref(),
594            &format,
595            open,
596            no_open,
597            stdout,
598            output.as_deref(),
599        ),
600        Commands::Close { slug } => commands::close::run(slug.as_deref()),
601        Commands::Rm { slug, force } => commands::rm::run(&slug, force),
602        Commands::Route {
603            url,
604            prefer,
605            rules,
606            preset,
607        } => commands::route::run(&url, prefer.as_deref(), rules.as_deref(), preset.as_deref()),
608        Commands::Series { tag, open } => commands::series::run(&tag, open),
609        Commands::Diff { slug, unused_only } => commands::diff::run(slug.as_deref(), unused_only),
610        Commands::Coverage { slug } => commands::coverage::run(slug.as_deref()),
611        Commands::Doctor {
612            provider_smoke,
613            tool_smoke,
614            provider,
615        } => commands::doctor::run(provider_smoke, tool_smoke, &provider),
616        #[cfg(feature = "autoresearch")]
617        Commands::Loop {
618            slug,
619            provider,
620            iterations,
621            max_actions,
622            dry_run,
623            fake_responses,
624        } => commands::loop_cmd::run(
625            slug.as_deref(),
626            &provider,
627            iterations,
628            max_actions,
629            dry_run,
630            fake_responses.as_deref().map(split_fake_responses),
631        ),
632        Commands::Wiki { sub } => match sub {
633            WikiCmd::List { slug } => commands::wiki::run_list(slug.as_deref()),
634            WikiCmd::Show { page, slug } => commands::wiki::run_show(&page, slug.as_deref()),
635            WikiCmd::Rm { page, slug, force } => {
636                commands::wiki::run_rm(&page, slug.as_deref(), force)
637            }
638            WikiCmd::Query {
639                question,
640                slug,
641                save_as,
642                format,
643                provider,
644            } => commands::wiki_query::run(
645                &question,
646                slug.as_deref(),
647                save_as.as_deref(),
648                format.as_deref(),
649                &provider,
650            ),
651            WikiCmd::Lint { slug, stale_days } => {
652                commands::wiki_lint::run(slug.as_deref(), stale_days)
653            }
654        },
655        Commands::Schema { sub } => match sub {
656            SchemaCmd::Show { slug } => commands::schema::run_show(slug.as_deref()),
657            SchemaCmd::Edit { slug } => commands::schema::run_edit(slug.as_deref()),
658        },
659        Commands::Help => unreachable!("Help handled in run()"),
660    }
661}
662
663/// Split `--fake-responses` into individual JSON turns.
664///
665/// Accepts BOTH separators:
666/// - ASCII Record Separator (`\u{1e}`) — original wire format, used by
667///   integration tests that pipe multiple JSON payloads where `;` or
668///   commas inside the JSON would be ambiguous.
669/// - Semicolon (`;`) — what the `--help` text advertises; also the
670///   ergonomic choice for a developer typing a quick debug replay.
671///
672/// Semicolon takes precedence: if the string contains a literal `;` we
673/// split on it, otherwise we fall back to the record separator. This
674/// keeps the test wire format working and lets CLI users follow the
675/// documented syntax.
676#[cfg(any(feature = "autoresearch", test))]
677fn split_fake_responses(raw: &str) -> Vec<String> {
678    let delim: char = if raw.contains(';') { ';' } else { '\u{1e}' };
679    raw.split(delim).map(str::to_string).collect()
680}
681
682#[cfg(test)]
683mod split_fake_tests {
684    use super::split_fake_responses;
685
686    #[test]
687    fn splits_on_semicolon_when_present() {
688        let v = split_fake_responses("resp1;resp2;resp3");
689        assert_eq!(v, vec!["resp1", "resp2", "resp3"]);
690    }
691
692    #[test]
693    fn falls_back_to_record_separator() {
694        let v = split_fake_responses("a\u{1e}b\u{1e}c");
695        assert_eq!(v, vec!["a", "b", "c"]);
696    }
697
698    #[test]
699    fn single_payload_yields_one_element() {
700        let v = split_fake_responses("just-one");
701        assert_eq!(v, vec!["just-one"]);
702    }
703
704    #[test]
705    fn semicolon_wins_over_record_separator_if_both_present() {
706        // Record separator is vanishingly unlikely inside a JSON payload,
707        // but verify the precedence documented in the helper's docstring.
708        let v = split_fake_responses("a;b\u{1e}c");
709        assert_eq!(v, vec!["a", "b\u{1e}c"]);
710    }
711}
research/cli.rs

research/
cli.rs