Skip to main content

sif_parser/
types.rs

1// SIF Core v1 — Data types derived from the specification.
2//
3// Reference: SIF-SPEC.md §3 (Data Model), §7 (Type System),
4//            §8 (Schema), §9 (Records), §16-22 (Semantics, Sections,
5//            Directives, Blocks, References, Inline Mode, Templates),
6//            Appendix A (Collected ABNF).
7
8use std::collections::HashMap;
9
10// ── Type System (§7) ────────────────────────────────────────────────
11
12/// SIF type as declared in a `#schema` directive.
13///
14/// Types compose via suffixes: `str[]?` → `Nullable(Array(Str))`.
15/// Suffixes are applied left-to-right per §7.1.
16#[derive(Debug, Clone, PartialEq)]
17pub enum Type {
18    // Scalar types (§7.1)
19    Bool,
20    Int,
21    Uint,
22    Float,
23    Str,
24    Date,
25    DateTime,
26    Duration,
27    Bytes,
28    Enum(Vec<String>),
29    Null,
30    Any,
31
32    // Compound types (§7.2)
33    Map,
34    Array(Box<Type>),
35    Nullable(Box<Type>),
36}
37
38impl Type {
39    /// Returns `true` if this type accepts the null literal `_`.
40    pub fn is_nullable(&self) -> bool {
41        matches!(self, Type::Nullable(_) | Type::Any | Type::Null)
42    }
43}
44
45impl std::fmt::Display for Type {
46    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
47        match self {
48            Type::Bool => write!(f, "bool"),
49            Type::Int => write!(f, "int"),
50            Type::Uint => write!(f, "uint"),
51            Type::Float => write!(f, "float"),
52            Type::Str => write!(f, "str"),
53            Type::Date => write!(f, "date"),
54            Type::DateTime => write!(f, "datetime"),
55            Type::Duration => write!(f, "duration"),
56            Type::Bytes => write!(f, "bytes"),
57            Type::Enum(variants) => {
58                write!(f, "enum({})", variants.join(","))
59            }
60            Type::Null => write!(f, "null"),
61            Type::Any => write!(f, "any"),
62            Type::Map => write!(f, "map"),
63            Type::Array(inner) => write!(f, "{}[]", inner),
64            Type::Nullable(inner) => write!(f, "{}?", inner),
65        }
66    }
67}
68
69// ── Values (§9–§15) ─────────────────────────────────────────────────
70
71/// A parsed SIF value.
72#[derive(Debug, Clone, PartialEq)]
73pub enum Value {
74    Null,
75    Bool(bool),
76    Int(i64),
77    Uint(u64),
78    Float(f64),
79    Str(String),
80    Date(String),
81    DateTime(String),
82    Duration(String),
83    Bytes(Vec<u8>),
84    Enum(String),
85    Array(Vec<Value>),
86    Map(Vec<(String, Value)>),
87}
88
89impl Value {
90    pub fn is_null(&self) -> bool {
91        matches!(self, Value::Null)
92    }
93
94    pub fn as_str(&self) -> Option<&str> {
95        match self {
96            Value::Str(s) => Some(s),
97            _ => None,
98        }
99    }
100
101    pub fn as_int(&self) -> Option<i64> {
102        match self {
103            Value::Int(n) => Some(*n),
104            _ => None,
105        }
106    }
107
108    pub fn as_uint(&self) -> Option<u64> {
109        match self {
110            Value::Uint(n) => Some(*n),
111            _ => None,
112        }
113    }
114
115    pub fn as_float(&self) -> Option<f64> {
116        match self {
117            Value::Float(n) => Some(*n),
118            _ => None,
119        }
120    }
121
122    pub fn as_bool(&self) -> Option<bool> {
123        match self {
124            Value::Bool(b) => Some(*b),
125            _ => None,
126        }
127    }
128}
129
130// ── Schema (§8) ─────────────────────────────────────────────────────
131
132/// A key-value modifier on a field definition (§8.6).
133///
134/// Boolean flags have `value = None` (e.g., `hidden`).
135/// Key-value pairs have `value = Some(...)` (e.g., `unit=usd`).
136#[derive(Debug, Clone, PartialEq)]
137pub struct Modifier {
138    pub name: String,
139    pub value: Option<String>,
140}
141
142/// A single field definition within a `#schema` directive (§8).
143#[derive(Debug, Clone, PartialEq)]
144pub struct FieldDef {
145    pub name: String,
146    pub field_type: Type,
147    pub semantic: Option<String>,
148    pub deprecated: bool,
149    pub modifiers: Vec<Modifier>,
150}
151
152/// A parsed `#schema` directive (§8).
153#[derive(Debug, Clone, PartialEq)]
154pub struct Schema {
155    pub fields: Vec<FieldDef>,
156}
157
158impl Schema {
159    pub fn field_count(&self) -> usize {
160        self.fields.len()
161    }
162
163    pub fn field_by_name(&self, name: &str) -> Option<&FieldDef> {
164        self.fields.iter().find(|f| f.name == name)
165    }
166
167    pub fn field_index(&self, name: &str) -> Option<usize> {
168        self.fields.iter().position(|f| f.name == name)
169    }
170
171    /// Returns the field with the `:id` semantic, if any.
172    pub fn id_field(&self) -> Option<&FieldDef> {
173        self.fields
174            .iter()
175            .find(|f| f.semantic.as_deref() == Some("id"))
176    }
177}
178
179// ── CDC (SIF Streaming §17) ─────────────────────────────────────────
180
181/// Change Data Capture operation prefix on a record.
182#[derive(Debug, Clone, Copy, PartialEq, Eq)]
183pub enum CdcOp {
184    /// No prefix — insert (default in CDC streams) or plain record.
185    Insert,
186    /// `Δ` (U+0394) prefix — full-replace update.
187    Update,
188    /// `∅` (U+2205) prefix — tombstone delete.
189    Delete,
190}
191
192// ── Records (§9) ────────────────────────────────────────────────────
193
194/// A parsed data record.
195#[derive(Debug, Clone, PartialEq)]
196pub struct Record {
197    pub values: Vec<Value>,
198    pub cdc_op: CdcOp,
199}
200
201impl Record {
202    /// Access a field value by index.
203    pub fn get(&self, index: usize) -> Option<&Value> {
204        self.values.get(index)
205    }
206
207    /// Access a field value by name using the given schema.
208    pub fn get_by_name<'a>(&'a self, name: &str, schema: &Schema) -> Option<&'a Value> {
209        schema.field_index(name).and_then(|i| self.values.get(i))
210    }
211}
212
213// ── Sort direction (§18.4) ──────────────────────────────────────────
214
215#[derive(Debug, Clone, Copy, PartialEq, Eq)]
216pub enum SortDirection {
217    Asc,
218    Desc,
219}
220
221// ── Field references (§20) ──────────────────────────────────────────
222
223/// A reference to a field, optionally in another section.
224///
225/// `field` alone references the current section.
226/// `§section.field` references a named section.
227#[derive(Debug, Clone, PartialEq)]
228pub struct FieldRef {
229    pub section: Option<String>,
230    pub field: String,
231}
232
233// ── Directives (§18) ────────────────────────────────────────────────
234
235/// A parsed meta directive.
236#[derive(Debug, Clone, PartialEq)]
237pub enum Directive {
238    Context(String),
239    Source(String),
240    License(String),
241    Sort {
242        field: String,
243        direction: SortDirection,
244    },
245    Filter(String),
246    Limit(u64),
247    Truncated(Vec<(String, String)>),
248    Relation {
249        from: FieldRef,
250        to: FieldRef,
251    },
252    Recall,
253    Error(String),
254    /// A directive not recognized by this parser (§18.8).
255    Unknown {
256        name: String,
257        content: String,
258    },
259}
260
261// ── Blocks (§19) ────────────────────────────────────────────────────
262
263#[derive(Debug, Clone, Copy, PartialEq, Eq)]
264pub enum BlockType {
265    Code,
266    Text,
267    Diff,
268    Raw,
269    Template,
270}
271
272impl BlockType {
273    pub fn as_str(&self) -> &'static str {
274        match self {
275            BlockType::Code => "code",
276            BlockType::Text => "text",
277            BlockType::Diff => "diff",
278            BlockType::Raw => "raw",
279            BlockType::Template => "template",
280        }
281    }
282}
283
284/// A parsed block region (§19).
285#[derive(Debug, Clone, PartialEq)]
286pub struct Block {
287    pub block_type: BlockType,
288    pub attributes: Vec<(String, String)>,
289    pub content: String,
290}
291
292// ── Templates (§22) ─────────────────────────────────────────────────
293
294/// A named template for record rendering (§22).
295#[derive(Debug, Clone, PartialEq)]
296pub struct Template {
297    pub name: String,
298    pub body: String,
299}
300
301impl Template {
302    /// Render this template against a record using the given schema.
303    ///
304    /// Substitutes `@{field_name}` with the corresponding value.
305    pub fn render(&self, record: &Record, schema: &Schema) -> String {
306        let mut result = self.body.clone();
307        for field in &schema.fields {
308            let placeholder = format!("@{{{}}}", field.name);
309            if let Some(value) = record.get_by_name(&field.name, schema) {
310                let text = format_value(value);
311                result = result.replace(&placeholder, &text);
312            }
313        }
314        result
315    }
316}
317
318fn format_value(value: &Value) -> String {
319    match value {
320        Value::Null => "_".to_string(),
321        Value::Bool(b) => if *b { "T" } else { "F" }.to_string(),
322        Value::Int(n) => n.to_string(),
323        Value::Uint(n) => n.to_string(),
324        Value::Float(n) => {
325            let s = n.to_string();
326            if s.contains('.') { s } else { format!("{}.0", s) }
327        }
328        Value::Str(s) => s.clone(),
329        Value::Date(s) | Value::DateTime(s) | Value::Duration(s) => s.clone(),
330        Value::Bytes(b) => base64_encode(b),
331        Value::Enum(s) => s.clone(),
332        Value::Array(arr) => {
333            let elems: Vec<String> = arr.iter().map(format_value).collect();
334            format!("[{}]", elems.join(","))
335        }
336        Value::Map(entries) => {
337            let pairs: Vec<String> = entries
338                .iter()
339                .map(|(k, v)| format!("{}:{}", k, format_value(v)))
340                .collect();
341            format!("{{{}}}", pairs.join(","))
342        }
343    }
344}
345
346pub(crate) fn base64_encode(data: &[u8]) -> String {
347    const CHARS: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
348    let mut result = String::with_capacity((data.len() + 2) / 3 * 4);
349    for chunk in data.chunks(3) {
350        let b0 = chunk[0] as u32;
351        let b1 = chunk.get(1).copied().unwrap_or(0) as u32;
352        let b2 = chunk.get(2).copied().unwrap_or(0) as u32;
353        let n = (b0 << 16) | (b1 << 8) | b2;
354        result.push(CHARS[((n >> 18) & 63) as usize] as char);
355        result.push(CHARS[((n >> 12) & 63) as usize] as char);
356        if chunk.len() > 1 {
357            result.push(CHARS[((n >> 6) & 63) as usize] as char);
358        } else {
359            result.push('=');
360        }
361        if chunk.len() > 2 {
362            result.push(CHARS[(n & 63) as usize] as char);
363        } else {
364            result.push('=');
365        }
366    }
367    result
368}
369
370// ── Inline Annotations (§16.3) ──────────────────────────────────────
371
372/// A span of text that may contain inline semantic annotations.
373#[derive(Debug, Clone, PartialEq)]
374pub enum Span {
375    /// Plain text with no annotation.
376    Text(String),
377    /// Annotated text: `@semantic{content}`.
378    Annotated {
379        semantic: String,
380        children: Vec<Span>,
381    },
382}
383
384// ── Sections (§17) ──────────────────────────────────────────────────
385
386/// A section within a SIF document.
387#[derive(Debug, Clone, PartialEq)]
388pub struct Section {
389    pub id: Option<String>,
390    pub directives: Vec<Directive>,
391    pub schema: Option<Schema>,
392    pub records: Vec<Record>,
393    pub blocks: Vec<Block>,
394    pub templates: Vec<Template>,
395}
396
397// ── Header (§6) ─────────────────────────────────────────────────────
398
399/// The parsed header line of a SIF document.
400#[derive(Debug, Clone, PartialEq)]
401pub struct Header {
402    pub version: u32,
403    pub attributes: HashMap<String, String>,
404}
405
406// ── Document (§5) ───────────────────────────────────────────────────
407
408/// A complete parsed SIF document.
409#[derive(Debug, Clone, PartialEq)]
410pub struct Document {
411    pub header: Header,
412    pub sections: Vec<Section>,
413}
414
415impl Document {
416    /// Find a section by its `§name` identifier.
417    pub fn section_by_id(&self, id: &str) -> Option<&Section> {
418        self.sections.iter().find(|s| s.id.as_deref() == Some(id))
419    }
420}
421
422// ── Streaming Events ────────────────────────────────────────────────
423
424/// Events emitted by the streaming `Reader`.
425#[derive(Debug, Clone, PartialEq)]
426pub enum Event {
427    Header(Header),
428    SectionId(String),
429    SectionBreak,
430    Schema(Schema),
431    Record(Record),
432    Directive(Directive),
433    BlockStart {
434        block_type: BlockType,
435        attributes: Vec<(String, String)>,
436    },
437    BlockLine(String),
438    BlockEnd,
439    TemplateStart(String),
440    TemplateLine(String),
441    TemplateEnd,
442}