Skip to main content

ucm_core/
content.rs

1//! Content types for UCM blocks.
2//!
3//! Each block contains typed content that can be text, tables, code,
4//! math expressions, media, JSON, or binary data.
5
6use crate::id::BlockId;
7use serde::{Deserialize, Serialize};
8
9/// The content payload of a block.
10#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
11#[serde(tag = "type", rename_all = "snake_case")]
12pub enum Content {
13    /// Plain, markdown, or rich text
14    Text(Text),
15
16    /// Tabular data with optional schema
17    Table(Table),
18
19    /// Source code with language hint
20    Code(Code),
21
22    /// Mathematical expressions
23    Math(Math),
24
25    /// Media references (images, audio, video)
26    Media(Media),
27
28    /// Structured JSON data
29    Json {
30        value: serde_json::Value,
31        #[serde(skip_serializing_if = "Option::is_none")]
32        schema: Option<JsonSchema>,
33    },
34
35    /// Raw binary data
36    Binary {
37        mime_type: String,
38        #[serde(with = "base64_serde")]
39        data: Vec<u8>,
40        #[serde(default)]
41        encoding: BinaryEncoding,
42    },
43
44    /// Composite block (contains other blocks by reference)
45    Composite {
46        layout: CompositeLayout,
47        children: Vec<BlockId>,
48    },
49}
50
51impl Content {
52    /// Get the type tag for hashing and identification
53    pub fn type_tag(&self) -> &'static str {
54        match self {
55            Content::Text(_) => "text",
56            Content::Table(_) => "table",
57            Content::Code(_) => "code",
58            Content::Math(_) => "math",
59            Content::Media(_) => "media",
60            Content::Json { .. } => "json",
61            Content::Binary { .. } => "binary",
62            Content::Composite { .. } => "composite",
63        }
64    }
65
66    /// Create a simple text content
67    pub fn text(text: impl Into<String>) -> Self {
68        Content::Text(Text {
69            text: text.into(),
70            format: TextFormat::Plain,
71        })
72    }
73
74    /// Create markdown text content
75    pub fn markdown(text: impl Into<String>) -> Self {
76        Content::Text(Text {
77            text: text.into(),
78            format: TextFormat::Markdown,
79        })
80    }
81
82    /// Create code content
83    pub fn code(language: impl Into<String>, source: impl Into<String>) -> Self {
84        Content::Code(Code {
85            language: language.into(),
86            source: source.into(),
87            highlights: Vec::new(),
88        })
89    }
90
91    /// Create JSON content
92    pub fn json(value: serde_json::Value) -> Self {
93        Content::Json {
94            value,
95            schema: None,
96        }
97    }
98
99    /// Create table content from rows of strings
100    pub fn table(rows: Vec<Vec<String>>) -> Self {
101        let columns = if rows.is_empty() {
102            Vec::new()
103        } else {
104            rows[0]
105                .iter()
106                .enumerate()
107                .map(|(i, _)| Column {
108                    name: format!("col{}", i),
109                    data_type: Some(DataType::Text),
110                    nullable: true,
111                })
112                .collect()
113        };
114
115        let table_rows = rows
116            .into_iter()
117            .map(|r| Row {
118                cells: r.into_iter().map(Cell::Text).collect(),
119            })
120            .collect();
121
122        Content::Table(Table {
123            columns,
124            rows: table_rows,
125            schema: None,
126        })
127    }
128
129    /// Check if the content is empty
130    pub fn is_empty(&self) -> bool {
131        match self {
132            Content::Text(t) => t.text.is_empty(),
133            Content::Table(t) => t.rows.is_empty(),
134            Content::Code(c) => c.source.is_empty(),
135            Content::Math(m) => m.expression.is_empty(),
136            Content::Media(_) => false,
137            Content::Json { value, .. } => value.is_null(),
138            Content::Binary { data, .. } => data.is_empty(),
139            Content::Composite { children, .. } => children.is_empty(),
140        }
141    }
142
143    /// Get approximate size in bytes
144    pub fn size_bytes(&self) -> usize {
145        match self {
146            Content::Text(t) => t.text.len(),
147            Content::Table(t) => {
148                t.columns.iter().map(|c| c.name.len()).sum::<usize>()
149                    + t.rows
150                        .iter()
151                        .flat_map(|r| &r.cells)
152                        .map(|c| c.size_bytes())
153                        .sum::<usize>()
154            }
155            Content::Code(c) => c.source.len(),
156            Content::Math(m) => m.expression.len(),
157            Content::Media(m) => match &m.source {
158                MediaSource::Base64(s) => s.len(),
159                MediaSource::Url(s) => s.len(),
160                _ => 32,
161            },
162            Content::Json { value, .. } => value.to_string().len(),
163            Content::Binary { data, .. } => data.len(),
164            Content::Composite { children, .. } => children.len() * 12,
165        }
166    }
167}
168
169/// Text content
170#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
171pub struct Text {
172    pub text: String,
173    #[serde(default)]
174    pub format: TextFormat,
175}
176
177/// Text format
178#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
179#[serde(rename_all = "lowercase")]
180pub enum TextFormat {
181    #[default]
182    Plain,
183    Markdown,
184    Rich,
185}
186
187/// Table content
188#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
189pub struct Table {
190    pub columns: Vec<Column>,
191    pub rows: Vec<Row>,
192    #[serde(skip_serializing_if = "Option::is_none")]
193    pub schema: Option<TableSchema>,
194}
195
196impl Table {
197    pub fn new(columns: Vec<Column>) -> Self {
198        Self {
199            columns,
200            rows: Vec::new(),
201            schema: None,
202        }
203    }
204
205    pub fn with_rows(mut self, rows: Vec<Row>) -> Self {
206        self.rows = rows;
207        self
208    }
209
210    pub fn add_row(&mut self, row: Row) {
211        self.rows.push(row);
212    }
213
214    pub fn column_count(&self) -> usize {
215        self.columns.len()
216    }
217
218    pub fn row_count(&self) -> usize {
219        self.rows.len()
220    }
221}
222
223/// Table column definition
224#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
225pub struct Column {
226    pub name: String,
227    #[serde(skip_serializing_if = "Option::is_none")]
228    pub data_type: Option<DataType>,
229    #[serde(default = "default_true")]
230    pub nullable: bool,
231}
232
233fn default_true() -> bool {
234    true
235}
236
237impl Column {
238    pub fn new(name: impl Into<String>) -> Self {
239        Self {
240            name: name.into(),
241            data_type: None,
242            nullable: true,
243        }
244    }
245
246    pub fn with_type(mut self, data_type: DataType) -> Self {
247        self.data_type = Some(data_type);
248        self
249    }
250
251    pub fn not_null(mut self) -> Self {
252        self.nullable = false;
253        self
254    }
255}
256
257/// Data type for table columns
258#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
259#[serde(rename_all = "lowercase")]
260pub enum DataType {
261    Text,
262    Integer,
263    Float,
264    Boolean,
265    Date,
266    DateTime,
267    Json,
268}
269
270/// Table row
271#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
272pub struct Row {
273    pub cells: Vec<Cell>,
274}
275
276impl Row {
277    pub fn new(cells: Vec<Cell>) -> Self {
278        Self { cells }
279    }
280
281    pub fn from_strings(values: Vec<&str>) -> Self {
282        Self {
283            cells: values
284                .into_iter()
285                .map(|s| Cell::Text(s.to_string()))
286                .collect(),
287        }
288    }
289}
290
291/// Table cell value
292#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
293#[serde(untagged)]
294pub enum Cell {
295    Null,
296    Text(String),
297    Number(f64),
298    Boolean(bool),
299    Date(String),     // ISO 8601 date
300    DateTime(String), // ISO 8601 datetime
301    Json(serde_json::Value),
302}
303
304impl Cell {
305    pub fn size_bytes(&self) -> usize {
306        match self {
307            Cell::Null => 0,
308            Cell::Text(s) => s.len(),
309            Cell::Number(_) => 8,
310            Cell::Boolean(_) => 1,
311            Cell::Date(s) => s.len(),
312            Cell::DateTime(s) => s.len(),
313            Cell::Json(v) => v.to_string().len(),
314        }
315    }
316
317    pub fn is_null(&self) -> bool {
318        matches!(self, Cell::Null)
319    }
320}
321
322impl From<&str> for Cell {
323    fn from(s: &str) -> Self {
324        Cell::Text(s.to_string())
325    }
326}
327
328impl From<String> for Cell {
329    fn from(s: String) -> Self {
330        Cell::Text(s)
331    }
332}
333
334impl From<i64> for Cell {
335    fn from(n: i64) -> Self {
336        Cell::Number(n as f64)
337    }
338}
339
340impl From<f64> for Cell {
341    fn from(n: f64) -> Self {
342        Cell::Number(n)
343    }
344}
345
346impl From<bool> for Cell {
347    fn from(b: bool) -> Self {
348        Cell::Boolean(b)
349    }
350}
351
352/// Table schema
353#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
354pub struct TableSchema {
355    #[serde(skip_serializing_if = "Option::is_none")]
356    pub primary_key: Option<Vec<String>>,
357    #[serde(default, skip_serializing_if = "Vec::is_empty")]
358    pub constraints: Vec<Constraint>,
359    #[serde(default, skip_serializing_if = "Vec::is_empty")]
360    pub indices: Vec<IndexDef>,
361}
362
363/// Table constraint
364#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
365#[serde(tag = "type", rename_all = "snake_case")]
366pub enum Constraint {
367    Unique {
368        columns: Vec<String>,
369    },
370    Check {
371        expression: String,
372    },
373    ForeignKey {
374        columns: Vec<String>,
375        references: String,
376    },
377}
378
379/// Index definition
380#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
381pub struct IndexDef {
382    pub name: String,
383    pub columns: Vec<String>,
384    #[serde(default)]
385    pub unique: bool,
386}
387
388/// Code content
389#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
390pub struct Code {
391    pub language: String,
392    pub source: String,
393    #[serde(default, skip_serializing_if = "Vec::is_empty")]
394    pub highlights: Vec<LineRange>,
395}
396
397impl Code {
398    pub fn new(language: impl Into<String>, source: impl Into<String>) -> Self {
399        Self {
400            language: language.into(),
401            source: source.into(),
402            highlights: Vec::new(),
403        }
404    }
405
406    pub fn line_count(&self) -> usize {
407        self.source.lines().count()
408    }
409
410    pub fn get_lines(&self, start: usize, end: usize) -> Option<String> {
411        let lines: Vec<&str> = self.source.lines().collect();
412        if start > 0 && end <= lines.len() && start <= end {
413            Some(lines[start - 1..end].join("\n"))
414        } else {
415            None
416        }
417    }
418}
419
420/// Line range for code highlights
421#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
422pub struct LineRange {
423    pub start: usize,
424    pub end: usize,
425}
426
427impl LineRange {
428    pub fn new(start: usize, end: usize) -> Self {
429        Self { start, end }
430    }
431
432    pub fn single(line: usize) -> Self {
433        Self {
434            start: line,
435            end: line,
436        }
437    }
438}
439
440/// Math content
441#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
442pub struct Math {
443    pub format: MathFormat,
444    pub expression: String,
445    #[serde(default)]
446    pub display_mode: bool,
447}
448
449impl Math {
450    pub fn latex(expression: impl Into<String>) -> Self {
451        Self {
452            format: MathFormat::LaTeX,
453            expression: expression.into(),
454            display_mode: false,
455        }
456    }
457
458    pub fn display(mut self) -> Self {
459        self.display_mode = true;
460        self
461    }
462}
463
464/// Math format
465#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
466pub enum MathFormat {
467    #[default]
468    LaTeX,
469    MathML,
470    AsciiMath,
471}
472
473/// Media content
474#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
475pub struct Media {
476    pub media_type: MediaType,
477    pub source: MediaSource,
478    #[serde(skip_serializing_if = "Option::is_none")]
479    pub alt_text: Option<String>,
480    #[serde(skip_serializing_if = "Option::is_none")]
481    pub dimensions: Option<Dimensions>,
482    #[serde(skip_serializing_if = "Option::is_none")]
483    pub content_hash: Option<[u8; 32]>,
484}
485
486impl Media {
487    pub fn image(source: MediaSource) -> Self {
488        Self {
489            media_type: MediaType::Image,
490            source,
491            alt_text: None,
492            dimensions: None,
493            content_hash: None,
494        }
495    }
496
497    pub fn with_alt(mut self, alt: impl Into<String>) -> Self {
498        self.alt_text = Some(alt.into());
499        self
500    }
501
502    pub fn with_dimensions(mut self, width: u32, height: u32) -> Self {
503        self.dimensions = Some(Dimensions { width, height });
504        self
505    }
506}
507
508/// Media type
509#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
510#[serde(rename_all = "lowercase")]
511pub enum MediaType {
512    Image,
513    Audio,
514    Video,
515    Document,
516}
517
518/// Media source
519#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
520#[serde(tag = "type", content = "value", rename_all = "lowercase")]
521pub enum MediaSource {
522    Url(String),
523    Base64(String),
524    Reference(BlockId),
525    External(ExternalRef),
526}
527
528impl MediaSource {
529    pub fn url(url: impl Into<String>) -> Self {
530        MediaSource::Url(url.into())
531    }
532}
533
534/// External storage reference
535#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
536pub struct ExternalRef {
537    pub provider: String,
538    pub bucket: String,
539    pub key: String,
540    #[serde(skip_serializing_if = "Option::is_none")]
541    pub region: Option<String>,
542}
543
544/// Dimensions for media
545#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
546pub struct Dimensions {
547    pub width: u32,
548    pub height: u32,
549}
550
551/// JSON schema reference
552#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
553#[serde(untagged)]
554pub enum JsonSchema {
555    Uri(String),
556    Inline(serde_json::Value),
557}
558
559/// Binary encoding
560#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
561#[serde(rename_all = "lowercase")]
562pub enum BinaryEncoding {
563    #[default]
564    Raw,
565    Base64,
566    Hex,
567}
568
569/// Composite layout
570#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
571#[serde(rename_all = "lowercase")]
572pub enum CompositeLayout {
573    #[default]
574    Vertical,
575    Horizontal,
576    Grid(usize),
577    Tabs,
578}
579
580// Base64 serde helper
581mod base64_serde {
582    use base64::{engine::general_purpose::STANDARD, Engine as _};
583    use serde::{self, Deserialize, Deserializer, Serializer};
584
585    pub fn serialize<S>(bytes: &[u8], serializer: S) -> Result<S::Ok, S::Error>
586    where
587        S: Serializer,
588    {
589        serializer.serialize_str(&STANDARD.encode(bytes))
590    }
591
592    pub fn deserialize<'de, D>(deserializer: D) -> Result<Vec<u8>, D::Error>
593    where
594        D: Deserializer<'de>,
595    {
596        let s = String::deserialize(deserializer)?;
597        STANDARD.decode(&s).map_err(serde::de::Error::custom)
598    }
599}
600
601#[cfg(test)]
602mod tests {
603    use super::*;
604
605    #[test]
606    fn test_content_type_tag() {
607        assert_eq!(Content::text("hello").type_tag(), "text");
608        assert_eq!(Content::code("rust", "fn main() {}").type_tag(), "code");
609        assert_eq!(Content::json(serde_json::json!({})).type_tag(), "json");
610    }
611
612    #[test]
613    fn test_text_content() {
614        let content = Content::text("Hello, world!");
615        match content {
616            Content::Text(t) => {
617                assert_eq!(t.text, "Hello, world!");
618                assert_eq!(t.format, TextFormat::Plain);
619            }
620            _ => panic!("Expected Text content"),
621        }
622    }
623
624    #[test]
625    fn test_table_content() {
626        let mut table = Table::new(vec![
627            Column::new("name").with_type(DataType::Text),
628            Column::new("age").with_type(DataType::Integer),
629        ]);
630        table.add_row(Row::new(vec![Cell::from("Alice"), Cell::from(30i64)]));
631
632        assert_eq!(table.column_count(), 2);
633        assert_eq!(table.row_count(), 1);
634    }
635
636    #[test]
637    fn test_code_lines() {
638        let code = Code::new("rust", "line1\nline2\nline3\nline4");
639        assert_eq!(code.line_count(), 4);
640        assert_eq!(code.get_lines(2, 3), Some("line2\nline3".to_string()));
641    }
642
643    #[test]
644    fn test_content_serialization() {
645        let content = Content::text("Hello");
646        let json = serde_json::to_string(&content).unwrap();
647        let parsed: Content = serde_json::from_str(&json).unwrap();
648        assert_eq!(content, parsed);
649    }
650}