datalab-cli 0.1.0

A powerful CLI for converting, extracting, and processing documents using the Datalab API
Documentation
use clap::{CommandFactory, Parser, Subcommand};
use clap_mangen::Man;
use std::fs;
use std::path::PathBuf;

// Re-define the CLI structure for man page generation
// This is necessary because the main CLI uses runtime modules

const LONG_ABOUT: &str = "\
A powerful command-line interface for converting, extracting, and processing \
documents using the Datalab API.

All commands output JSON to stdout for easy integration with scripts and agents. \
Progress events are streamed to stderr as JSON when running interactively.

GETTING STARTED:
  1. Get your API key from https://www.datalab.to/app/keys
  2. Export it: export DATALAB_API_KEY=\"your-key\"
  3. Convert a document: datalab convert document.pdf

For detailed documentation, visit: https://documentation.datalab.to/";

const AFTER_HELP: &str = "\
ENVIRONMENT VARIABLES:
  DATALAB_API_KEY     API key for authentication (required)
  DATALAB_BASE_URL    Custom API endpoint for on-premises deployments
  NO_COLOR            Disable colored output when set

EXAMPLES:
  datalab convert invoice.pdf
  datalab extract contract.pdf --schema schema.json
  datalab fill form.pdf --fields '{\"name\": \"John\"}'

For more examples, see: https://documentation.datalab.to/docs/welcome/quickstart";

#[derive(Parser)]
#[command(
    name = "datalab",
    author = "Datalab <support@datalab.to>",
    version = env!("CARGO_PKG_VERSION"),
    about = "CLI for the Datalab document processing API",
    long_about = LONG_ABOUT,
    after_help = AFTER_HELP,
)]
struct Cli {
    /// Suppress all progress output
    #[arg(short, long, global = true)]
    quiet: bool,

    /// Enable verbose progress output (even when piped)
    #[arg(short, long, global = true)]
    verbose: bool,

    #[command(subcommand)]
    command: Commands,
}

#[derive(Subcommand)]
enum Commands {
    /// Convert a document to markdown, HTML, JSON, or chunks
    Convert {
        /// File path or URL to convert
        input: String,
        /// Output format
        #[arg(long, default_value = "markdown")]
        output_format: String,
    },
    /// Extract structured data from a document using a JSON schema
    Extract {
        /// File path or URL
        input: String,
        /// JSON schema
        #[arg(long)]
        schema: String,
    },
    /// Segment a document into logical sections
    Segment {
        /// File path or URL
        input: String,
        /// Segmentation schema
        #[arg(long)]
        schema: String,
    },
    /// Fill a form with provided field data
    Fill {
        /// File path or URL
        input: String,
        /// Field data JSON
        #[arg(long)]
        fields: String,
    },
    /// Extract track changes from a Word document
    TrackChanges {
        /// DOCX file path or URL
        input: String,
    },
    /// Create a document from markdown
    CreateDocument {
        /// Markdown content
        #[arg(long)]
        markdown: String,
    },
    /// Score extraction results
    ExtractScore {
        /// Checkpoint ID
        #[arg(long)]
        checkpoint_id: String,
    },
    /// File management operations
    Files {
        #[command(subcommand)]
        command: FilesCommand,
    },
    /// Workflow management operations
    Workflows {
        #[command(subcommand)]
        command: WorkflowsCommand,
    },
    /// Cache management operations
    Cache {
        #[command(subcommand)]
        command: CacheCommand,
    },
}

#[derive(Subcommand)]
enum FilesCommand {
    /// Upload a file
    Upload { file: PathBuf },
    /// List files
    List,
    /// Get file metadata
    Get { file_id: String },
    /// Download a file
    Download { file_id: String },
    /// Delete a file
    Delete { file_id: String },
}

#[derive(Subcommand)]
enum WorkflowsCommand {
    /// Create a workflow
    Create {
        #[arg(long)]
        name: String,
        #[arg(long)]
        steps: PathBuf,
    },
    /// List workflows
    List,
    /// Get workflow
    Get { workflow_id: String },
    /// Execute workflow
    Execute {
        workflow_id: String,
        #[arg(long)]
        input: PathBuf,
    },
    /// Get execution status
    Execution { execution_id: String },
    /// Delete workflow
    Delete { workflow_id: String },
    /// List step types
    StepTypes,
}

#[derive(Subcommand)]
enum CacheCommand {
    /// Clear cache
    Clear {
        #[arg(long)]
        older_than: Option<u64>,
    },
    /// Show cache stats
    Stats,
}

fn main() -> std::io::Result<()> {
    // Only generate man pages in release builds or when explicitly requested
    let out_dir = PathBuf::from(std::env::var_os("OUT_DIR").unwrap_or_else(|| "target/man".into()));

    let man_dir = out_dir.join("man");
    fs::create_dir_all(&man_dir)?;

    let cmd = Cli::command();

    // Generate main man page
    let man = Man::new(cmd.clone());
    let mut buffer = Vec::new();
    man.render(&mut buffer)?;
    fs::write(man_dir.join("datalab.1"), buffer)?;

    // Generate man pages for subcommands
    for subcommand in cmd.get_subcommands() {
        let name = subcommand.get_name();
        let man = Man::new(subcommand.clone());
        let mut buffer = Vec::new();
        man.render(&mut buffer)?;
        fs::write(man_dir.join(format!("datalab-{}.1", name)), buffer)?;
    }

    println!("cargo:rerun-if-changed=build.rs");
    println!("cargo:rerun-if-changed=src/main.rs");

    Ok(())
}