Skip to main content

alimentar/serve/
content.rs

1//! Core content types for WASM serving
2//!
3//! Defines the `ServeableContent` trait and supporting types for content
4//! that can be served via browser-based WASM applications.
5
6use std::{collections::HashMap, sync::Arc};
7
8use arrow::record_batch::RecordBatch;
9use serde::{Deserialize, Serialize};
10
11use crate::{error::Result, serve::schema::ContentSchema};
12
13/// Unique identifier for content types
14///
15/// Content types follow a namespaced format: `namespace.type`
16/// Built-in types include:
17/// - `alimentar.dataset` - Arrow/Parquet datasets
18/// - `alimentar.raw` - Raw pasted/clipboard data
19/// - `assetgen.course` - Educational courses
20/// - `aprender.model` - ML models
21#[derive(Debug, Clone, Hash, Eq, PartialEq, Serialize, Deserialize)]
22pub struct ContentTypeId(String);
23
24impl ContentTypeId {
25    /// Dataset content type identifier
26    pub const DATASET: &'static str = "alimentar.dataset";
27    /// Course content type identifier (assetgen)
28    pub const COURSE: &'static str = "assetgen.course";
29    /// Model content type identifier (aprender)
30    pub const MODEL: &'static str = "aprender.model";
31    /// Registry content type identifier
32    pub const REGISTRY: &'static str = "alimentar.registry";
33    /// Raw/pasted data content type identifier
34    pub const RAW: &'static str = "alimentar.raw";
35
36    /// Create a new content type ID
37    pub fn new(id: impl Into<String>) -> Self {
38        Self(id.into())
39    }
40
41    /// Create dataset content type
42    pub fn dataset() -> Self {
43        Self(Self::DATASET.to_string())
44    }
45
46    /// Create course content type
47    pub fn course() -> Self {
48        Self(Self::COURSE.to_string())
49    }
50
51    /// Create model content type
52    pub fn model() -> Self {
53        Self(Self::MODEL.to_string())
54    }
55
56    /// Create registry content type
57    pub fn registry() -> Self {
58        Self(Self::REGISTRY.to_string())
59    }
60
61    /// Create raw data content type
62    pub fn raw() -> Self {
63        Self(Self::RAW.to_string())
64    }
65
66    /// Get the string representation
67    pub fn as_str(&self) -> &str {
68        &self.0
69    }
70
71    /// Check if this is a built-in type
72    pub fn is_builtin(&self) -> bool {
73        matches!(
74            self.0.as_str(),
75            Self::DATASET | Self::COURSE | Self::MODEL | Self::REGISTRY | Self::RAW
76        )
77    }
78}
79
80impl std::fmt::Display for ContentTypeId {
81    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
82        write!(f, "{}", self.0)
83    }
84}
85
86/// Metadata associated with serveable content
87#[derive(Debug, Clone, Serialize, Deserialize)]
88pub struct ContentMetadata {
89    /// Content type identifier
90    pub content_type: ContentTypeId,
91    /// Human-readable title
92    pub title: String,
93    /// Optional description
94    pub description: Option<String>,
95    /// Content size in bytes
96    pub size: usize,
97    /// Number of rows (for tabular data)
98    pub row_count: Option<usize>,
99    /// Content schema (if applicable)
100    pub schema: Option<ContentSchema>,
101    /// Source information (URL, file path, or "clipboard")
102    pub source: Option<String>,
103    /// Additional custom metadata
104    #[serde(default)]
105    pub custom: HashMap<String, serde_json::Value>,
106}
107
108impl ContentMetadata {
109    /// Create new metadata with required fields
110    pub fn new(content_type: ContentTypeId, title: impl Into<String>, size: usize) -> Self {
111        Self {
112            content_type,
113            title: title.into(),
114            description: None,
115            size,
116            row_count: None,
117            schema: None,
118            source: None,
119            custom: HashMap::new(),
120        }
121    }
122
123    /// Set description
124    pub fn with_description(mut self, description: impl Into<String>) -> Self {
125        self.description = Some(description.into());
126        self
127    }
128
129    /// Set row count
130    pub fn with_row_count(mut self, count: usize) -> Self {
131        self.row_count = Some(count);
132        self
133    }
134
135    /// Set schema
136    pub fn with_schema(mut self, schema: ContentSchema) -> Self {
137        self.schema = Some(schema);
138        self
139    }
140
141    /// Set source
142    pub fn with_source(mut self, source: impl Into<String>) -> Self {
143        self.source = Some(source.into());
144        self
145    }
146
147    /// Add custom metadata
148    pub fn with_custom(mut self, key: impl Into<String>, value: serde_json::Value) -> Self {
149        self.custom.insert(key.into(), value);
150        self
151    }
152}
153
154/// Validation report for content integrity checks
155#[derive(Debug, Clone, Serialize, Deserialize)]
156pub struct ValidationReport {
157    /// Whether validation passed
158    pub valid: bool,
159    /// List of errors found
160    pub errors: Vec<ValidationError>,
161    /// List of warnings
162    pub warnings: Vec<ValidationWarning>,
163}
164
165impl ValidationReport {
166    /// Create a successful validation report
167    pub fn success() -> Self {
168        Self {
169            valid: true,
170            errors: Vec::new(),
171            warnings: Vec::new(),
172        }
173    }
174
175    /// Create a failed validation report with errors
176    pub fn failure(errors: Vec<ValidationError>) -> Self {
177        Self {
178            valid: false,
179            errors,
180            warnings: Vec::new(),
181        }
182    }
183
184    /// Add a warning to the report
185    pub fn with_warning(mut self, warning: ValidationWarning) -> Self {
186        self.warnings.push(warning);
187        self
188    }
189
190    /// Add an error and mark as invalid
191    pub fn with_error(mut self, error: ValidationError) -> Self {
192        self.valid = false;
193        self.errors.push(error);
194        self
195    }
196}
197
198/// A validation error
199#[derive(Debug, Clone, Serialize, Deserialize)]
200pub struct ValidationError {
201    /// Field or path that failed validation
202    pub path: String,
203    /// Error message
204    pub message: String,
205    /// Error code for programmatic handling
206    pub code: Option<String>,
207}
208
209impl ValidationError {
210    /// Create a new validation error
211    pub fn new(path: impl Into<String>, message: impl Into<String>) -> Self {
212        Self {
213            path: path.into(),
214            message: message.into(),
215            code: None,
216        }
217    }
218
219    /// Add an error code
220    pub fn with_code(mut self, code: impl Into<String>) -> Self {
221        self.code = Some(code.into());
222        self
223    }
224}
225
226/// A validation warning (non-fatal issue)
227#[derive(Debug, Clone, Serialize, Deserialize)]
228pub struct ValidationWarning {
229    /// Field or path with warning
230    pub path: String,
231    /// Warning message
232    pub message: String,
233}
234
235impl ValidationWarning {
236    /// Create a new validation warning
237    pub fn new(path: impl Into<String>, message: impl Into<String>) -> Self {
238        Self {
239            path: path.into(),
240            message: message.into(),
241        }
242    }
243}
244
245/// Trait for any content that can be served via WASM
246///
247/// This trait provides the abstraction layer between content types
248/// (datasets, courses, models, raw data) and the serving infrastructure.
249pub trait ServeableContent: Send + Sync {
250    /// Returns the content schema for validation and UI generation
251    fn schema(&self) -> ContentSchema;
252
253    /// Validates content integrity
254    ///
255    /// # Errors
256    ///
257    /// Returns an error if validation cannot be performed.
258    fn validate(&self) -> Result<ValidationReport>;
259
260    /// Converts content to Arrow RecordBatch for efficient transfer
261    ///
262    /// # Errors
263    ///
264    /// Returns an error if the content cannot be converted to Arrow format.
265    fn to_arrow(&self) -> Result<RecordBatch>;
266
267    /// Returns content metadata for indexing and discovery
268    fn metadata(&self) -> ContentMetadata;
269
270    /// Returns content type identifier
271    fn content_type(&self) -> ContentTypeId;
272
273    /// Chunk iterator for streaming large content
274    fn chunks(&self, chunk_size: usize) -> Box<dyn Iterator<Item = Result<RecordBatch>> + Send>;
275
276    /// Get the raw bytes representation (for serialization)
277    ///
278    /// # Errors
279    ///
280    /// Returns an error if the content cannot be serialized to bytes.
281    fn to_bytes(&self) -> Result<Vec<u8>>;
282}
283
284/// A boxed serveable content for dynamic dispatch
285pub type BoxedContent = Box<dyn ServeableContent>;
286
287/// Arc-wrapped serveable content for shared ownership
288#[allow(dead_code)]
289pub type SharedContent = Arc<dyn ServeableContent>;
290
291#[cfg(test)]
292#[allow(clippy::unwrap_used)]
293mod tests {
294    use super::*;
295
296    #[test]
297    fn test_content_type_id_new() {
298        let id = ContentTypeId::new("custom.type");
299        assert_eq!(id.as_str(), "custom.type");
300    }
301
302    #[test]
303    fn test_content_type_is_builtin() {
304        assert!(ContentTypeId::dataset().is_builtin());
305        assert!(ContentTypeId::course().is_builtin());
306        assert!(ContentTypeId::raw().is_builtin());
307        assert!(!ContentTypeId::new("custom.type").is_builtin());
308    }
309
310    #[test]
311    fn test_content_metadata_builder() {
312        let meta = ContentMetadata::new(ContentTypeId::dataset(), "Test Dataset", 1024)
313            .with_description("A test dataset")
314            .with_row_count(100)
315            .with_source("clipboard")
316            .with_custom("version", serde_json::json!("1.0"));
317
318        assert_eq!(meta.title, "Test Dataset");
319        assert_eq!(meta.description, Some("A test dataset".to_string()));
320        assert_eq!(meta.row_count, Some(100));
321        assert_eq!(meta.source, Some("clipboard".to_string()));
322        assert!(meta.custom.contains_key("version"));
323    }
324
325    #[test]
326    fn test_validation_report() {
327        let report = ValidationReport::success()
328            .with_warning(ValidationWarning::new("field1", "Optional field missing"));
329
330        assert!(report.valid);
331        assert!(report.errors.is_empty());
332        assert_eq!(report.warnings.len(), 1);
333
334        let report = ValidationReport::failure(vec![ValidationError::new(
335            "field2",
336            "Required field missing",
337        )
338        .with_code("REQUIRED_FIELD")]);
339
340        assert!(!report.valid);
341        assert_eq!(report.errors.len(), 1);
342        assert_eq!(report.errors[0].code, Some("REQUIRED_FIELD".to_string()));
343    }
344
345    #[test]
346    fn test_validation_report_with_error() {
347        let report = ValidationReport::success().with_error(ValidationError::new("field", "Error"));
348
349        assert!(!report.valid);
350        assert_eq!(report.errors.len(), 1);
351    }
352
353    #[test]
354    fn test_content_type_id_model() {
355        let model = ContentTypeId::model();
356        assert_eq!(model.as_str(), "aprender.model");
357        assert!(model.is_builtin());
358    }
359
360    #[test]
361    fn test_content_type_id_registry() {
362        let registry = ContentTypeId::registry();
363        assert_eq!(registry.as_str(), "alimentar.registry");
364        assert!(registry.is_builtin());
365    }
366
367    #[test]
368    fn test_validation_error_without_code() {
369        let err = ValidationError::new("path", "message");
370        assert!(err.code.is_none());
371        assert_eq!(err.path, "path");
372        assert_eq!(err.message, "message");
373    }
374}