datalab-cli 0.1.0

A powerful CLI for converting, extracting, and processing documents using the Datalab API
Documentation
use crate::cache::Cache;
use crate::client::DatalabClient;
use crate::error::{DatalabError, Result};
use crate::output::Progress;
use base64::Engine;
use clap::Args;
use reqwest::multipart::Form;
use serde_json::json;
use std::fs;
use std::path::PathBuf;

#[derive(Args, Debug)]
pub struct CreateDocumentArgs {
    /// Markdown content or path to markdown file
    #[arg(long, value_name = "MARKDOWN")]
    pub markdown: String,

    /// Output format (currently only docx supported)
    #[arg(
        long,
        default_value = "docx",
        value_name = "FORMAT",
        help_heading = "Output Options"
    )]
    pub output_format: String,

    /// Skip local cache lookup
    #[arg(long, help_heading = "Cache Options")]
    pub skip_cache: bool,

    /// Write created document to file (binary output)
    #[arg(long, short, value_name = "FILE", help_heading = "Output Options")]
    pub output: Option<PathBuf>,

    /// Request timeout in seconds
    #[arg(
        long,
        default_value = "300",
        value_name = "SECS",
        help_heading = "Advanced Options"
    )]
    pub timeout: u64,
}

impl CreateDocumentArgs {
    fn get_markdown(&self) -> Result<String> {
        let md_path = PathBuf::from(&self.markdown);
        if md_path.exists() {
            Ok(fs::read_to_string(&md_path)?)
        } else {
            Ok(self.markdown.clone())
        }
    }

    fn to_cache_params(&self, markdown: &str) -> serde_json::Value {
        use sha2::{Digest, Sha256};
        let mut hasher = Sha256::new();
        hasher.update(markdown.as_bytes());
        let markdown_hash = hex::encode(hasher.finalize());

        json!({
            "markdown_hash": markdown_hash,
            "output_format": self.output_format,
        })
    }
}

pub async fn execute(args: CreateDocumentArgs, progress: &Progress) -> Result<()> {
    let client = DatalabClient::new(Some(args.timeout))?;
    let cache = Cache::new()?;

    let markdown = args.get_markdown()?;

    progress.start("create-document", None);

    let cache_params = args.to_cache_params(&markdown);
    let cache_key = Cache::generate_key(None, None, "create-document", &cache_params);

    if !args.skip_cache {
        if let Some(cached) = cache.get(&cache_key) {
            progress.cache_hit(&cache_key);
            output_result(&cached, args.output.as_ref())?;
            return Ok(());
        }
    }

    let form = Form::new()
        .text("markdown", markdown)
        .text("output_format", args.output_format.clone());

    let result = client
        .submit_and_poll("create-document", form, progress)
        .await?;

    cache.set(&cache_key, &result, "create-document", None, None)?;

    output_result(&result, args.output.as_ref())?;

    Ok(())
}

fn output_result(result: &serde_json::Value, output_file: Option<&PathBuf>) -> Result<()> {
    if let Some(path) = output_file {
        if let Some(base64_data) = result.get("output_base64").and_then(|v| v.as_str()) {
            let decoded = base64::engine::general_purpose::STANDARD
                .decode(base64_data)
                .map_err(|e| DatalabError::InvalidInput(format!("Invalid base64: {}", e)))?;
            fs::write(path, &decoded)?;

            let mut meta = result.clone();
            meta.as_object_mut().map(|o| o.remove("output_base64"));
            println!("{}", serde_json::to_string_pretty(&meta)?);
        } else {
            println!("{}", serde_json::to_string_pretty(result)?);
        }
    } else {
        println!("{}", serde_json::to_string_pretty(result)?);
    }

    Ok(())
}