axonml-server 0.6.2

//! Dataset Database Operations — CRUD for ML Dataset Metadata
//!
//! Provides document-store-backed persistence for ML dataset records via
//! `DatasetRepository`. Datasets are stored in the `axonml_datasets`
//! Aegis-DB document collection.
//!
//! Key types:
//! - `Dataset` — full dataset record with id, owner, file path, size,
//!   sample/feature/class counts, and timestamps.
//! - `NewDataset` — creation payload (no id or timestamps yet).
//! - `DatasetType` — enum of supported modalities: Image, Tabular, Text,
//!   Audio, Custom (defaults to Tabular).
//! - `DatasetRepository` — borrows a `Database` reference and exposes
//!   `create()`, `find_by_id()`, `find_by_user()`, and `delete()`.
//!
//! # File
//! `crates/axonml-server/src/db/datasets.rs`
//!
//! # Author
//! Andrew Jewell Sr. — AutomataNexus LLC
//! ORCID: 0009-0005-2158-7060
//!
//! # Updated
//! April 16, 2026 11:15 PM EST
//!
//! # Disclaimer
//! Use at own risk. This software is provided "as is", without warranty of any
//! kind, express or implied. The author and AutomataNexus shall not be held
//! liable for any damages arising from the use of this software.

// =============================================================================
// Imports
// =============================================================================

use super::{Database, DbError, DocumentQuery};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use uuid::Uuid;

// =============================================================================
// Constants
// =============================================================================

/// Collection name for datasets
const COLLECTION: &str = "axonml_datasets";

// =============================================================================
// Types
// =============================================================================

/// Dataset type enum
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "lowercase")]
#[derive(Default)]
pub enum DatasetType {
    Image,
    #[default]
    Tabular,
    Text,
    Audio,
    Custom,
}

/// Dataset data structure
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Dataset {
    pub id: String,
    pub user_id: String,
    pub name: String,
    pub description: Option<String>,
    pub dataset_type: DatasetType,
    pub file_path: String,
    pub file_size: u64,
    pub num_samples: Option<u64>,
    pub num_features: Option<u64>,
    pub num_classes: Option<u64>,
    pub created_at: DateTime<Utc>,
    pub updated_at: DateTime<Utc>,
}

/// New dataset creation data
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NewDataset {
    pub user_id: String,
    pub name: String,
    pub description: Option<String>,
    pub dataset_type: DatasetType,
    pub file_path: String,
    pub file_size: u64,
    pub num_samples: Option<u64>,
    pub num_features: Option<u64>,
    pub num_classes: Option<u64>,
}

// =============================================================================
// Repository
// =============================================================================

/// Dataset repository for database operations
pub struct DatasetRepository<'a> {
    db: &'a Database,
}

impl<'a> DatasetRepository<'a> {
    /// Create a new dataset repository
    pub fn new(db: &'a Database) -> Self {
        Self { db }
    }

    // -------------------------------------------------------------------------
    // CRUD Operations
    // -------------------------------------------------------------------------

    /// Create a new dataset
    pub async fn create(&self, new_dataset: NewDataset) -> Result<Dataset, DbError> {
        let now = Utc::now();
        let dataset = Dataset {
            id: Uuid::new_v4().to_string(),
            user_id: new_dataset.user_id,
            name: new_dataset.name,
            description: new_dataset.description,
            dataset_type: new_dataset.dataset_type,
            file_path: new_dataset.file_path,
            file_size: new_dataset.file_size,
            num_samples: new_dataset.num_samples,
            num_features: new_dataset.num_features,
            num_classes: new_dataset.num_classes,
            created_at: now,
            updated_at: now,
        };

        let dataset_json = serde_json::to_value(&dataset)?;
        self.db
            .doc_insert(COLLECTION, Some(&dataset.id), dataset_json)
            .await?;

        Ok(dataset)
    }

    /// Find dataset by ID
    pub async fn find_by_id(&self, id: &str) -> Result<Option<Dataset>, DbError> {
        let doc = self.db.doc_get(COLLECTION, id).await?;

        match doc {
            Some(data) => {
                let dataset: Dataset = serde_json::from_value(data)?;
                Ok(Some(dataset))
            }
            None => Ok(None),
        }
    }

    // -------------------------------------------------------------------------
    // Queries
    // -------------------------------------------------------------------------

    /// Find all datasets for a user
    pub async fn find_by_user(&self, user_id: &str) -> Result<Vec<Dataset>, DbError> {
        let query = DocumentQuery {
            filter: Some(serde_json::json!({ "user_id": user_id })),
            ..Default::default()
        };

        let docs = self.db.doc_query(COLLECTION, query).await?;

        let datasets: Vec<Dataset> = docs
            .into_iter()
            .filter_map(|d| serde_json::from_value(d).ok())
            .collect();

        Ok(datasets)
    }

    /// Delete a dataset
    pub async fn delete(&self, id: &str) -> Result<(), DbError> {
        self.db.doc_delete(COLLECTION, id).await
    }
}