datalab-cli 0.1.0

A powerful CLI for converting, extracting, and processing documents using the Datalab API
Documentation
use crate::error::{DatalabError, Result};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
use std::fs;
use std::path::PathBuf;

#[derive(Serialize, Deserialize)]
pub struct CacheMetadata {
    pub created_at: DateTime<Utc>,
    pub endpoint: String,
    pub params_hash: String,
    pub file_hash: Option<String>,
    pub file_path: Option<String>,
}

pub struct Cache {
    base_dir: PathBuf,
}

impl Cache {
    pub fn new() -> Result<Self> {
        let base_dir = dirs::cache_dir()
            .ok_or_else(|| {
                DatalabError::CacheError(std::io::Error::new(
                    std::io::ErrorKind::NotFound,
                    "Could not find cache directory",
                ))
            })?
            .join("datalab");

        fs::create_dir_all(base_dir.join("responses"))?;
        fs::create_dir_all(base_dir.join("files"))?;

        Ok(Self { base_dir })
    }

    pub fn generate_key(
        file_hash: Option<&str>,
        file_url: Option<&str>,
        endpoint: &str,
        params: &serde_json::Value,
    ) -> String {
        let mut hasher = Sha256::new();

        if let Some(hash) = file_hash {
            hasher.update(hash.as_bytes());
        }
        if let Some(url) = file_url {
            hasher.update(url.as_bytes());
        }
        hasher.update(endpoint.as_bytes());

        let params_str = serde_json::to_string(params).unwrap_or_default();
        hasher.update(params_str.as_bytes());

        hex::encode(hasher.finalize())
    }

    pub fn hash_file(path: &PathBuf) -> Result<String> {
        let content = fs::read(path)?;
        let mut hasher = Sha256::new();
        hasher.update(&content);
        Ok(hex::encode(hasher.finalize()))
    }

    pub fn get(&self, cache_key: &str) -> Option<serde_json::Value> {
        let response_path = self
            .base_dir
            .join("responses")
            .join(format!("{}.json", cache_key));

        if response_path.exists() {
            if let Ok(content) = fs::read_to_string(&response_path) {
                if let Ok(value) = serde_json::from_str(&content) {
                    return Some(value);
                }
            }
        }
        None
    }

    pub fn set(
        &self,
        cache_key: &str,
        response: &serde_json::Value,
        endpoint: &str,
        file_hash: Option<&str>,
        file_path: Option<&str>,
    ) -> Result<()> {
        let response_path = self
            .base_dir
            .join("responses")
            .join(format!("{}.json", cache_key));
        let meta_path = self
            .base_dir
            .join("responses")
            .join(format!("{}.meta.json", cache_key));

        fs::write(&response_path, serde_json::to_string_pretty(response)?)?;

        let metadata = CacheMetadata {
            created_at: Utc::now(),
            endpoint: endpoint.to_string(),
            params_hash: cache_key.to_string(),
            file_hash: file_hash.map(String::from),
            file_path: file_path.map(String::from),
        };

        fs::write(&meta_path, serde_json::to_string_pretty(&metadata)?)?;

        Ok(())
    }

    #[allow(dead_code)]
    pub fn save_binary(&self, file_hash: &str, data: &[u8]) -> Result<PathBuf> {
        let path = self
            .base_dir
            .join("files")
            .join(format!("{}.bin", file_hash));
        fs::write(&path, data)?;
        Ok(path)
    }

    pub fn clear(&self, older_than_days: Option<u64>) -> Result<ClearStats> {
        let mut stats = ClearStats::default();

        let responses_dir = self.base_dir.join("responses");
        let files_dir = self.base_dir.join("files");

        let cutoff = older_than_days.map(|days| Utc::now() - chrono::Duration::days(days as i64));

        if responses_dir.exists() {
            for entry in fs::read_dir(&responses_dir)? {
                let entry = entry?;
                let path = entry.path();

                if path.extension().map(|e| e == "json").unwrap_or(false) {
                    let should_delete = if let Some(cutoff) = cutoff {
                        if path.to_string_lossy().ends_with(".meta.json") {
                            if let Ok(content) = fs::read_to_string(&path) {
                                if let Ok(meta) = serde_json::from_str::<CacheMetadata>(&content) {
                                    meta.created_at < cutoff
                                } else {
                                    false
                                }
                            } else {
                                false
                            }
                        } else {
                            continue;
                        }
                    } else {
                        true
                    };

                    if should_delete {
                        let base_name = path.file_stem().unwrap().to_string_lossy();
                        let base_name = base_name.trim_end_matches(".meta");
                        let response_file = responses_dir.join(format!("{}.json", base_name));
                        let meta_file = responses_dir.join(format!("{}.meta.json", base_name));

                        if response_file.exists() {
                            fs::remove_file(&response_file)?;
                            stats.responses_cleared += 1;
                        }
                        if meta_file.exists() {
                            fs::remove_file(&meta_file)?;
                        }
                    }
                }
            }
        }

        if cutoff.is_none() && files_dir.exists() {
            for entry in fs::read_dir(&files_dir)? {
                let entry = entry?;
                fs::remove_file(entry.path())?;
                stats.files_cleared += 1;
            }
        }

        Ok(stats)
    }

    pub fn stats(&self) -> Result<CacheStats> {
        let mut stats = CacheStats::default();

        let responses_dir = self.base_dir.join("responses");
        let files_dir = self.base_dir.join("files");

        if responses_dir.exists() {
            for entry in fs::read_dir(&responses_dir)? {
                let entry = entry?;
                let path = entry.path();
                if path.extension().map(|e| e == "json").unwrap_or(false)
                    && !path.to_string_lossy().ends_with(".meta.json")
                {
                    stats.response_count += 1;
                    stats.response_size += entry.metadata()?.len();
                }
            }
        }

        if files_dir.exists() {
            for entry in fs::read_dir(&files_dir)? {
                let entry = entry?;
                stats.file_count += 1;
                stats.file_size += entry.metadata()?.len();
            }
        }

        stats.cache_dir = self.base_dir.to_string_lossy().to_string();

        Ok(stats)
    }
}

#[derive(Default, Serialize)]
pub struct ClearStats {
    pub responses_cleared: usize,
    pub files_cleared: usize,
}

#[derive(Default, Serialize)]
pub struct CacheStats {
    pub cache_dir: String,
    pub response_count: usize,
    pub response_size: u64,
    pub file_count: usize,
    pub file_size: u64,
}