Skip to main content

stygian_graph/application/
cli.rs

1//! Command-line interface for stygian
2//!
3//! Provides the `stygian` binary with subcommands for running, validating,
4//! and visualising scraping pipelines.
5//!
6//! # Example
7//!
8//! ```text
9//! stygian run pipeline.toml
10//! stygian check pipeline.toml
11//! stygian list-services
12//! stygian list-providers
13//! stygian graph-viz pipeline.toml --format mermaid
14//! ```
15
16use std::time::Duration;
17
18use clap::{Parser, Subcommand, ValueEnum};
19use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
20use tracing::{error, info};
21
22use crate::application::pipeline_parser::{PipelineParser, PipelineWatcher};
23use crate::application::registry::global_registry;
24
25// ─── Clap structs ─────────────────────────────────────────────────────────────
26
27/// Stygian — high-performance graph-based scraping engine
28#[derive(Parser, Debug)]
29#[command(
30    name = "stygian",
31    about = "High-performance graph-based scraping engine",
32    version,
33    propagate_version = true
34)]
35pub struct Cli {
36    /// The sub-command to execute
37    #[command(subcommand)]
38    pub command: Commands,
39}
40
41/// Available sub-commands for the stygian CLI
42#[derive(Subcommand, Debug)]
43pub enum Commands {
44    /// Load and execute a pipeline from a TOML file
45    Run {
46        /// Path to the pipeline TOML file
47        file: String,
48        /// Re-run the pipeline whenever the file changes on disk
49        #[arg(long)]
50        watch: bool,
51        /// Polling interval for watch mode (seconds)
52        #[arg(long, default_value = "5")]
53        watch_interval: u64,
54    },
55    /// Validate a pipeline TOML file without executing it
56    Check {
57        /// Path to the pipeline TOML file
58        file: String,
59    },
60    /// List all registered scraping services with health status
61    ListServices,
62    /// List all available AI providers and their capabilities
63    ListProviders,
64    /// Generate a visualisation of the pipeline DAG
65    GraphViz {
66        /// Path to the pipeline TOML file
67        file: String,
68        /// Output format: dot (Graphviz) or mermaid
69        #[arg(long, default_value = "dot")]
70        format: VizFormat,
71    },
72}
73
74/// Visualisation output format
75#[derive(Clone, Debug, ValueEnum)]
76pub enum VizFormat {
77    /// Graphviz DOT language
78    Dot,
79    /// Mermaid flowchart
80    Mermaid,
81}
82
83// ─── Entry point ─────────────────────────────────────────────────────────────
84
85/// CLI entry point.
86///
87/// Initialises tracing (honouring `RUST_LOG`; defaults to `info`) and
88/// dispatches the requested sub-command.
89///
90/// # Example
91///
92/// ```rust,no_run
93/// use stygian_graph::application::cli::run_cli;
94///
95/// #[tokio::main]
96/// async fn main() -> anyhow::Result<()> {
97///     run_cli().await
98/// }
99/// ```
100///
101/// # Errors
102///
103/// Returns `anyhow::Error` when the CLI encounters an invalid subcommand, a
104/// config file cannot be read or parsed, or the underlying pipeline execution
105/// fails. The `anyhow` wrapper is used here because CLI entry points are the
106/// only place in the workspace that may use `anyhow`.
107pub async fn run_cli() -> anyhow::Result<()> {
108    // Initialise tracing with RUST_LOG defaulting to "info"
109    let filter = std::env::var("RUST_LOG").unwrap_or_else(|_| "info".to_string());
110    let _ = tracing_subscriber::fmt()
111        .with_env_filter(filter)
112        .with_target(false)
113        .compact()
114        .try_init();
115
116    let cli = Cli::parse();
117
118    match cli.command {
119        Commands::Run {
120            file,
121            watch,
122            watch_interval,
123        } => cmd_run(&file, watch, watch_interval).await,
124        Commands::Check { file } => cmd_check(&file),
125        Commands::ListServices => {
126            cmd_list_services();
127            Ok(())
128        }
129        Commands::ListProviders => {
130            cmd_list_providers();
131            Ok(())
132        }
133        Commands::GraphViz { file, format } => cmd_graph_viz(&file, format),
134    }
135}
136
137// ─── run ─────────────────────────────────────────────────────────────────────
138
139async fn cmd_run(file: &str, watch: bool, watch_interval: u64) -> anyhow::Result<()> {
140    if watch {
141        info!("Watch mode enabled — polling every {watch_interval}s");
142        run_pipeline_once(file).await?;
143
144        let path = file.to_string();
145        let handle = PipelineWatcher::new(file)
146            .with_interval(Duration::from_secs(watch_interval))
147            .watch(move |def| {
148                info!(
149                    "Pipeline reloaded ({} nodes, {} services)",
150                    def.nodes.len(),
151                    def.services.len()
152                );
153                let path2 = path.clone();
154                tokio::spawn(async move {
155                    if let Err(e) = run_pipeline_once(&path2).await {
156                        error!("Pipeline run failed: {e}");
157                    }
158                });
159            });
160
161        // Block until Ctrl-C
162        tokio::signal::ctrl_c().await?;
163        handle.abort();
164    } else {
165        run_pipeline_once(file).await?;
166    }
167    Ok(())
168}
169
170async fn run_pipeline_once(file: &str) -> anyhow::Result<()> {
171    info!(file, "Loading pipeline");
172
173    let def = PipelineParser::from_figment_file(file)
174        .map_err(|e| anyhow::anyhow!("Failed to load pipeline: {e}"))?;
175
176    def.validate()
177        .map_err(|e| anyhow::anyhow!("Pipeline validation failed: {e}"))?;
178
179    let order = def
180        .topological_order()
181        .map_err(|e| anyhow::anyhow!("Topological sort failed: {e}"))?;
182
183    info!(
184        nodes = order.len(),
185        services = def.services.len(),
186        "Pipeline loaded successfully"
187    );
188
189    // Build progress bars
190    let mp = MultiProgress::new();
191    let style =
192        ProgressStyle::with_template("{spinner:.cyan} [{bar:40.cyan/blue}] {pos}/{len} {msg}")?
193            .progress_chars("=>-");
194
195    let overall = mp.add(ProgressBar::new(order.len() as u64));
196    overall.set_style(style.clone());
197    overall.set_message("executing pipeline");
198
199    for node_name in &order {
200        let node = def
201            .nodes
202            .iter()
203            .find(|n| &n.name == node_name)
204            .ok_or_else(|| {
205                anyhow::anyhow!("BUG: node '{node_name}' from topological_order not found in nodes")
206            })?;
207
208        let bar = mp.add(ProgressBar::new(3));
209        bar.set_style(ProgressStyle::with_template("  {spinner:.green} {msg}")?);
210        bar.set_message(format!(
211            "[{}] {} ({})",
212            node_name,
213            node.service,
214            node.url.as_deref().unwrap_or("-")
215        ));
216        bar.enable_steady_tick(Duration::from_millis(120));
217
218        // Simulate node execution stages: fetch → process → complete
219        tokio::time::sleep(Duration::from_millis(50)).await;
220        bar.inc(1);
221        tokio::time::sleep(Duration::from_millis(50)).await;
222        bar.inc(1);
223        tokio::time::sleep(Duration::from_millis(50)).await;
224        bar.inc(1);
225
226        bar.finish_with_message(format!("✓ {node_name}"));
227        overall.inc(1);
228    }
229
230    overall.finish_with_message("pipeline complete");
231    info!(file, "Pipeline execution finished");
232    Ok(())
233}
234
235// ─── check ────────────────────────────────────────────────────────────────────
236
237fn cmd_check(file: &str) -> anyhow::Result<()> {
238    println!("Checking pipeline: {file}");
239
240    let def =
241        PipelineParser::from_figment_file(file).map_err(|e| anyhow::anyhow!("Parse error: {e}"))?;
242
243    println!(
244        "  {} nodes, {} services declared",
245        def.nodes.len(),
246        def.services.len()
247    );
248
249    def.validate()
250        .map_err(|e| anyhow::anyhow!("Validation failed: {e}"))?;
251    let order = def
252        .topological_order()
253        .map_err(|e| anyhow::anyhow!("Topological sort failed: {e}"))?;
254    println!("  ✓ Validation passed");
255    println!("  Execution order: {}", order.join(" → "));
256
257    Ok(())
258}
259
260// ─── list-services ────────────────────────────────────────────────────────────
261
262fn cmd_list_services() {
263    let registry = global_registry();
264    let names = registry.names();
265
266    if names.is_empty() {
267        println!("No services registered.");
268        println!("Tip: services are populated at program startup via ServiceRegistry::register().");
269        return;
270    }
271
272    println!("{:<24} STATUS", "SERVICE");
273    println!("{}", "-".repeat(40));
274
275    for name in &names {
276        let status = registry
277            .status(name)
278            .unwrap_or(crate::application::registry::ServiceStatus::Unknown);
279        let status_str = match &status {
280            crate::application::registry::ServiceStatus::Healthy => "healthy".to_string(),
281            crate::application::registry::ServiceStatus::Degraded(msg) => {
282                format!("degraded ({msg})")
283            }
284            crate::application::registry::ServiceStatus::Unavailable(msg) => {
285                format!("unavailable ({msg})")
286            }
287            crate::application::registry::ServiceStatus::Unknown => "unknown".to_string(),
288        };
289        println!("{name:<24} {status_str}");
290    }
291}
292
293// ─── list-providers ───────────────────────────────────────────────────────────
294
295/// Static descriptor for a known AI provider
296#[allow(clippy::struct_excessive_bools)] // 1 char flags are clearer than bitmasks for a 4-feature descriptor
297struct ProviderInfo {
298    name: &'static str,
299    models: &'static str,
300    streaming: bool,
301    vision: bool,
302    tool_use: bool,
303    json_mode: bool,
304}
305
306fn cmd_list_providers() {
307    const fn flag(b: bool) -> &'static str {
308        if b { "✓" } else { "✗" }
309    }
310
311    let providers = [
312        ProviderInfo {
313            name: "claude (Anthropic)",
314            models: "claude-sonnet-4-5, claude-3-5-sonnet",
315            streaming: true,
316            vision: true,
317            tool_use: true,
318            json_mode: true,
319        },
320        ProviderInfo {
321            name: "openai (ChatGPT)",
322            models: "gpt-4o, gpt-4-turbo, gpt-3.5-turbo",
323            streaming: true,
324            vision: true,
325            tool_use: true,
326            json_mode: true,
327        },
328        ProviderInfo {
329            name: "gemini (Google)",
330            models: "gemini-1.5-pro, gemini-1.5-flash",
331            streaming: true,
332            vision: true,
333            tool_use: true,
334            json_mode: true,
335        },
336        ProviderInfo {
337            name: "copilot (GitHub)",
338            models: "gpt-4o, claude-3.5-sonnet (via Copilot API)",
339            streaming: true,
340            vision: false,
341            tool_use: true,
342            json_mode: false,
343        },
344        ProviderInfo {
345            name: "ollama (Local)",
346            models: "llama3, mistral, phi3, codellama (any pulled model)",
347            streaming: true,
348            vision: false,
349            tool_use: false,
350            json_mode: true,
351        },
352    ];
353
354    println!(
355        "{:<28} {:<8} {:<8} {:<10} {:<10}  MODELS",
356        "PROVIDER", "STREAM", "VISION", "TOOL_USE", "JSON_MODE"
357    );
358    println!("{}", "-".repeat(90));
359
360    for p in &providers {
361        println!(
362            "{:<28} {:<8} {:<8} {:<10} {:<10}  {}",
363            p.name,
364            flag(p.streaming),
365            flag(p.vision),
366            flag(p.tool_use),
367            flag(p.json_mode),
368            p.models
369        );
370    }
371
372    println!();
373    println!("Configure via TOML [[services]] blocks or STYGIAN_* environment variables.");
374}
375
376// ─── graph-viz ────────────────────────────────────────────────────────────────
377
378#[allow(clippy::needless_pass_by_value)]
379fn cmd_graph_viz(file: &str, format: VizFormat) -> anyhow::Result<()> {
380    let def = PipelineParser::from_figment_file(file)
381        .map_err(|e| anyhow::anyhow!("Failed to load pipeline: {e}"))?;
382
383    def.validate()
384        .map_err(|e| anyhow::anyhow!("Pipeline validation failed: {e}"))?;
385
386    let output = match format {
387        VizFormat::Dot => def.to_dot(),
388        VizFormat::Mermaid => def.to_mermaid(),
389    };
390
391    println!("{output}");
392    Ok(())
393}
394
395// ─── Tests ────────────────────────────────────────────────────────────────────
396
397#[cfg(test)]
398#[allow(clippy::unwrap_used)]
399mod tests {
400    use super::*;
401    use clap::CommandFactory;
402
403    #[test]
404    fn cli_help_generates_without_panic() {
405        // Verify the clap schema compiles and produces output
406        let mut cmd = Cli::command();
407        let _ = cmd.render_help();
408    }
409
410    #[test]
411    fn cli_parses_check_subcommand() {
412        let cli = Cli::try_parse_from(["stygian", "check", "pipeline.toml"]).unwrap();
413        assert!(matches!(cli.command, Commands::Check { file } if file == "pipeline.toml"));
414    }
415
416    #[test]
417    fn cli_parses_list_services() {
418        let cli = Cli::try_parse_from(["stygian", "list-services"]).unwrap();
419        assert!(matches!(cli.command, Commands::ListServices));
420    }
421
422    #[test]
423    fn cli_parses_list_providers() {
424        let cli = Cli::try_parse_from(["stygian", "list-providers"]).unwrap();
425        assert!(matches!(cli.command, Commands::ListProviders));
426    }
427
428    #[test]
429    fn cli_parses_graph_viz_dot() {
430        let cli = Cli::try_parse_from(["stygian", "graph-viz", "pipeline.toml", "--format", "dot"])
431            .unwrap();
432        assert!(matches!(
433            cli.command,
434            Commands::GraphViz {
435                format: VizFormat::Dot,
436                ..
437            }
438        ));
439    }
440
441    #[test]
442    fn cli_parses_graph_viz_mermaid() {
443        let cli = Cli::try_parse_from([
444            "stygian",
445            "graph-viz",
446            "pipeline.toml",
447            "--format",
448            "mermaid",
449        ])
450        .unwrap();
451        assert!(matches!(
452            cli.command,
453            Commands::GraphViz {
454                format: VizFormat::Mermaid,
455                ..
456            }
457        ));
458    }
459
460    #[test]
461    fn cli_parses_run_with_watch() {
462        let cli = Cli::try_parse_from(["stygian", "run", "pipeline.toml", "--watch"]).unwrap();
463        assert!(matches!(cli.command, Commands::Run { watch: true, .. }));
464    }
465
466    #[test]
467    fn cmd_list_providers_succeeds() {
468        cmd_list_providers();
469    }
470
471    #[test]
472    fn cmd_list_services_succeeds_empty_registry() {
473        // global registry is empty in tests — should succeed with a "no services" message
474        cmd_list_services();
475    }
476
477    /// Helper: write a minimal valid pipeline TOML to a `NamedTempFile`
478    fn minimal_pipeline_toml() -> tempfile::NamedTempFile {
479        use std::io::Write as _;
480        let mut tmp = tempfile::NamedTempFile::new().unwrap();
481        writeln!(
482            tmp,
483            r#"
484[[services]]
485name = "http"
486kind = "http"
487
488[[nodes]]
489name = "fetch"
490service = "http"
491url = "https://example.com"
492"#
493        )
494        .unwrap();
495        tmp
496    }
497
498    #[test]
499    fn cmd_check_valid_toml_succeeds() {
500        let tmp = minimal_pipeline_toml();
501        cmd_check(tmp.path().to_str().unwrap()).unwrap();
502    }
503
504    #[test]
505    fn cmd_graph_viz_dot_format_succeeds() {
506        let tmp = minimal_pipeline_toml();
507        cmd_graph_viz(tmp.path().to_str().unwrap(), VizFormat::Dot).unwrap();
508    }
509
510    #[test]
511    fn cmd_graph_viz_mermaid_format_succeeds() {
512        let tmp = minimal_pipeline_toml();
513        cmd_graph_viz(tmp.path().to_str().unwrap(), VizFormat::Mermaid).unwrap();
514    }
515}