sochdb_core/
soch.rs

1// Copyright 2025 Sushanth (https://github.com/sushanthpy)
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! TOON (Tabular Object-Oriented Notation) - Native Data Format for SochDB
16//!
17//! TOON is a compact, schema-aware data format optimized for LLMs and databases.
18//! It's the native format for SochDB, like JSON is for MongoDB.
19//!
20//! Format: `name[count]{fields}:\nrow1\nrow2\n...`
21//!
22//! Example:
23//! ```text
24//! users[3]{id,name,email}:
25//! 1,Alice,alice@example.com
26//! 2,Bob,bob@example.com
27//! 3,Charlie,charlie@example.com
28//! ```
29
30use serde::{Deserialize, Serialize};
31use std::collections::HashMap;
32use std::fmt;
33
34/// TOON Value types
35#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
36pub enum SochValue {
37    Null,
38    Bool(bool),
39    Int(i64),
40    UInt(u64),
41    Float(f64),
42    Text(String),
43    Binary(Vec<u8>),
44    Array(Vec<SochValue>),
45    Object(HashMap<String, SochValue>),
46    /// Reference to another table row: ref(table_name, id)
47    Ref {
48        table: String,
49        id: u64,
50    },
51}
52
53impl SochValue {
54    pub fn is_null(&self) -> bool {
55        matches!(self, SochValue::Null)
56    }
57
58    pub fn as_int(&self) -> Option<i64> {
59        match self {
60            SochValue::Int(v) => Some(*v),
61            SochValue::UInt(v) => Some(*v as i64),
62            _ => None,
63        }
64    }
65
66    pub fn as_uint(&self) -> Option<u64> {
67        match self {
68            SochValue::UInt(v) => Some(*v),
69            SochValue::Int(v) if *v >= 0 => Some(*v as u64),
70            _ => None,
71        }
72    }
73
74    pub fn as_float(&self) -> Option<f64> {
75        match self {
76            SochValue::Float(v) => Some(*v),
77            SochValue::Int(v) => Some(*v as f64),
78            SochValue::UInt(v) => Some(*v as f64),
79            _ => None,
80        }
81    }
82
83    pub fn as_text(&self) -> Option<&str> {
84        match self {
85            SochValue::Text(s) => Some(s),
86            _ => None,
87        }
88    }
89
90    pub fn as_bool(&self) -> Option<bool> {
91        match self {
92            SochValue::Bool(b) => Some(*b),
93            _ => None,
94        }
95    }
96}
97
98fn needs_quoting(s: &str) -> bool {
99    if s.is_empty() { return true; }
100    if s.starts_with(' ') || s.ends_with(' ') { return true; }
101    if matches!(s, "true" | "false" | "null") { return true; }
102    
103    // Check for number-like patterns
104    if s.parse::<f64>().is_ok() { return true; }
105    if s == "-" || s.starts_with('-') { return true; }
106    // Leading zeros check (e.g. 05 usually treated as number in some contexts or invalid)
107    if s.len() > 1 && s.starts_with('0') && s.chars().nth(1).map_or(false, |c| c.is_ascii_digit()) && !s.contains('.') {
108        return true;
109    }
110
111    // Check for special chars or delimiter (comma)
112    // Spec ยง7.3: :, ", \, [, ], {, }, newline, return, tab, delimiter
113    s.contains(|c| matches!(c, ':' | '"' | '\\' | '[' | ']' | '{' | '}' | '\n' | '\r' | '\t' | ','))
114}
115
116impl fmt::Display for SochValue {
117    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
118        match self {
119            SochValue::Null => write!(f, "null"),
120            SochValue::Bool(b) => write!(f, "{}", b),
121            SochValue::Int(i) => write!(f, "{}", i),
122            SochValue::UInt(u) => write!(f, "{}", u),
123            SochValue::Float(fl) => write!(f, "{}", fl),
124            SochValue::Text(s) => {
125                if needs_quoting(s) {
126                    write!(f, "\"")?;
127                    for c in s.chars() {
128                        match c {
129                            '"' => write!(f, "\\\"")?,
130                            '\\' => write!(f, "\\\\")?,
131                            '\n' => write!(f, "\\n")?,
132                            '\r' => write!(f, "\\r")?,
133                            '\t' => write!(f, "\\t")?,
134                            c => write!(f, "{}", c)?,
135                        }
136                    }
137                    write!(f, "\"")
138                } else {
139                    write!(f, "{}", s)
140                }
141            }
142            SochValue::Binary(b) => write!(f, "0x{}", hex::encode(b)),
143            SochValue::Array(arr) => {
144                write!(f, "[")?;
145                for (i, v) in arr.iter().enumerate() {
146                    if i > 0 {
147                        write!(f, ";")?;
148                    }
149                    write!(f, "{}", v)?;
150                }
151                write!(f, "]")
152            }
153            SochValue::Object(obj) => {
154                write!(f, "{{")?;
155                for (i, (k, v)) in obj.iter().enumerate() {
156                    if i > 0 {
157                        write!(f, ";")?;
158                    }
159                    write!(f, "{}:{}", k, v)?;
160                }
161                write!(f, "}}")
162            }
163            SochValue::Ref { table, id } => write!(f, "@{}:{}", table, id),
164        }
165    }
166}
167
168/// Field type in a TOON schema
169#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
170pub enum SochType {
171    Null,
172    Bool,
173    Int,
174    UInt,
175    Float,
176    Text,
177    Binary,
178    Array(Box<SochType>),
179    Object(Vec<(String, SochType)>),
180    Ref(String), // Reference to table name
181    /// Union of types (for nullable fields)
182    Optional(Box<SochType>),
183}
184
185impl SochType {
186    /// Check if a value matches this type
187    pub fn matches(&self, value: &SochValue) -> bool {
188        match (self, value) {
189            (SochType::Null, SochValue::Null) => true,
190            (SochType::Bool, SochValue::Bool(_)) => true,
191            (SochType::Int, SochValue::Int(_)) => true,
192            (SochType::UInt, SochValue::UInt(_)) => true,
193            (SochType::Float, SochValue::Float(_)) => true,
194            (SochType::Text, SochValue::Text(_)) => true,
195            (SochType::Binary, SochValue::Binary(_)) => true,
196            (SochType::Array(inner), SochValue::Array(arr)) => arr.iter().all(|v| inner.matches(v)),
197            (SochType::Ref(table), SochValue::Ref { table: t, .. }) => table == t,
198            (SochType::Optional(inner), value) => value.is_null() || inner.matches(value),
199            _ => false,
200        }
201    }
202
203    /// Parse type from string notation
204    pub fn parse(s: &str) -> Option<Self> {
205        let s = s.trim();
206        match s {
207            "null" => Some(SochType::Null),
208            "bool" => Some(SochType::Bool),
209            "int" | "i64" => Some(SochType::Int),
210            "uint" | "u64" => Some(SochType::UInt),
211            "float" | "f64" => Some(SochType::Float),
212            "text" | "string" => Some(SochType::Text),
213            "binary" | "bytes" => Some(SochType::Binary),
214            _ if s.starts_with("ref(") && s.ends_with(')') => {
215                let table = &s[4..s.len() - 1];
216                Some(SochType::Ref(table.to_string()))
217            }
218            _ if s.starts_with("array(") && s.ends_with(')') => {
219                let inner = &s[6..s.len() - 1];
220                SochType::parse(inner).map(|t| SochType::Array(Box::new(t)))
221            }
222            _ if s.ends_with('?') => {
223                let inner = &s[..s.len() - 1];
224                SochType::parse(inner).map(|t| SochType::Optional(Box::new(t)))
225            }
226            _ => None,
227        }
228    }
229}
230
231impl fmt::Display for SochType {
232    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
233        match self {
234            SochType::Null => write!(f, "null"),
235            SochType::Bool => write!(f, "bool"),
236            SochType::Int => write!(f, "int"),
237            SochType::UInt => write!(f, "uint"),
238            SochType::Float => write!(f, "float"),
239            SochType::Text => write!(f, "text"),
240            SochType::Binary => write!(f, "binary"),
241            SochType::Array(inner) => write!(f, "array({})", inner),
242            SochType::Object(fields) => {
243                write!(f, "{{")?;
244                for (i, (name, ty)) in fields.iter().enumerate() {
245                    if i > 0 {
246                        write!(f, ",")?;
247                    }
248                    write!(f, "{}:{}", name, ty)?;
249                }
250                write!(f, "}}")
251            }
252            SochType::Ref(table) => write!(f, "ref({})", table),
253            SochType::Optional(inner) => write!(f, "{}?", inner),
254        }
255    }
256}
257
258/// A TOON schema definition
259#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
260pub struct SochSchema {
261    /// Schema name (table name)
262    pub name: String,
263    /// Field definitions
264    pub fields: Vec<SochField>,
265    /// Primary key field name
266    pub primary_key: Option<String>,
267    /// Indexes on this schema
268    pub indexes: Vec<SochIndex>,
269}
270
271/// A field in a TOON schema
272#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
273pub struct SochField {
274    pub name: String,
275    pub field_type: SochType,
276    pub nullable: bool,
277    pub default: Option<String>, // Default value as TOON string
278}
279
280/// An index definition
281#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
282pub struct SochIndex {
283    pub name: String,
284    pub fields: Vec<String>,
285    pub unique: bool,
286}
287
288impl SochSchema {
289    pub fn new(name: impl Into<String>) -> Self {
290        Self {
291            name: name.into(),
292            fields: Vec::new(),
293            primary_key: None,
294            indexes: Vec::new(),
295        }
296    }
297
298    pub fn field(mut self, name: impl Into<String>, field_type: SochType) -> Self {
299        self.fields.push(SochField {
300            name: name.into(),
301            field_type,
302            nullable: false,
303            default: None,
304        });
305        self
306    }
307
308    pub fn nullable_field(mut self, name: impl Into<String>, field_type: SochType) -> Self {
309        self.fields.push(SochField {
310            name: name.into(),
311            field_type,
312            nullable: true,
313            default: None,
314        });
315        self
316    }
317
318    pub fn primary_key(mut self, field: impl Into<String>) -> Self {
319        self.primary_key = Some(field.into());
320        self
321    }
322
323    pub fn index(mut self, name: impl Into<String>, fields: Vec<String>, unique: bool) -> Self {
324        self.indexes.push(SochIndex {
325            name: name.into(),
326            fields,
327            unique,
328        });
329        self
330    }
331
332    /// Get field names for header
333    pub fn field_names(&self) -> Vec<&str> {
334        self.fields.iter().map(|f| f.name.as_str()).collect()
335    }
336
337    /// Format schema header: name[0]{field1,field2,...}:
338    pub fn format_header(&self) -> String {
339        let fields: Vec<&str> = self.fields.iter().map(|f| f.name.as_str()).collect();
340        format!("{}[0]{{{}}}:", self.name, fields.join(","))
341    }
342}
343
344/// A TOON row - values for a single record
345#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
346pub struct SochRow {
347    pub values: Vec<SochValue>,
348}
349
350impl SochRow {
351    pub fn new(values: Vec<SochValue>) -> Self {
352        Self { values }
353    }
354
355    /// Get value by index
356    pub fn get(&self, index: usize) -> Option<&SochValue> {
357        self.values.get(index)
358    }
359
360    /// Format row as TOON line
361    pub fn format(&self) -> String {
362        self.values
363            .iter()
364            .map(|v| v.to_string())
365            .collect::<Vec<_>>()
366            .join(",")
367    }
368
369    /// Parse row from TOON line
370    pub fn parse(line: &str, schema: &SochSchema) -> Result<Self, String> {
371        let mut values = Vec::with_capacity(schema.fields.len());
372        let mut chars = line.chars().peekable();
373        let mut current = String::new();
374        let mut in_quotes = false;
375        let mut field_idx = 0;
376
377        while let Some(ch) = chars.next() {
378            match ch {
379                '"' if !in_quotes => {
380                    in_quotes = true;
381                }
382                '"' if in_quotes => {
383                    if chars.peek() == Some(&'"') {
384                        chars.next();
385                        current.push('"');
386                    } else {
387                        in_quotes = false;
388                    }
389                }
390                ',' if !in_quotes => {
391                    let value = Self::parse_value(&current, field_idx, schema)?;
392                    values.push(value);
393                    current.clear();
394                    field_idx += 1;
395                }
396                _ => {
397                    current.push(ch);
398                }
399            }
400        }
401
402        // Last field
403        if !current.is_empty() || field_idx < schema.fields.len() {
404            let value = Self::parse_value(&current, field_idx, schema)?;
405            values.push(value);
406        }
407
408        Ok(Self { values })
409    }
410
411    fn parse_value(s: &str, field_idx: usize, schema: &SochSchema) -> Result<SochValue, String> {
412        let s = s.trim();
413
414        if s.is_empty() || s == "null" {
415            return Ok(SochValue::Null);
416        }
417
418        let field = schema
419            .fields
420            .get(field_idx)
421            .ok_or_else(|| format!("Field index {} out of bounds", field_idx))?;
422
423        match &field.field_type {
424            SochType::Bool => match s.to_lowercase().as_str() {
425                "true" | "1" | "yes" => Ok(SochValue::Bool(true)),
426                "false" | "0" | "no" => Ok(SochValue::Bool(false)),
427                _ => Err(format!("Invalid bool: {}", s)),
428            },
429            SochType::Int => s
430                .parse::<i64>()
431                .map(SochValue::Int)
432                .map_err(|e| format!("Invalid int: {}", e)),
433            SochType::UInt => s
434                .parse::<u64>()
435                .map(SochValue::UInt)
436                .map_err(|e| format!("Invalid uint: {}", e)),
437            SochType::Float => s
438                .parse::<f64>()
439                .map(SochValue::Float)
440                .map_err(|e| format!("Invalid float: {}", e)),
441            SochType::Text => Ok(SochValue::Text(s.to_string())),
442            SochType::Binary => {
443                if let Some(hex_str) = s.strip_prefix("0x") {
444                    hex::decode(hex_str)
445                        .map(SochValue::Binary)
446                        .map_err(|e| format!("Invalid hex: {}", e))
447                } else {
448                    Err("Binary must start with 0x".to_string())
449                }
450            }
451            SochType::Ref(table) => {
452                // Format: @table:id or just id
453                if let Some(ref_str) = s.strip_prefix('@') {
454                    let parts: Vec<&str> = ref_str.split(':').collect();
455                    if parts.len() == 2 {
456                        let id = parts[1]
457                            .parse::<u64>()
458                            .map_err(|e| format!("Invalid ref id: {}", e))?;
459                        Ok(SochValue::Ref {
460                            table: parts[0].to_string(),
461                            id,
462                        })
463                    } else {
464                        Err(format!("Invalid ref format: {}", s))
465                    }
466                } else {
467                    let id = s
468                        .parse::<u64>()
469                        .map_err(|e| format!("Invalid ref id: {}", e))?;
470                    Ok(SochValue::Ref {
471                        table: table.clone(),
472                        id,
473                    })
474                }
475            }
476            SochType::Optional(inner) => {
477                // Try to parse as inner type
478                let temp_field = SochField {
479                    name: field.name.clone(),
480                    field_type: (**inner).clone(),
481                    nullable: true,
482                    default: None,
483                };
484                let temp_schema = SochSchema {
485                    name: schema.name.clone(),
486                    fields: vec![temp_field],
487                    primary_key: None,
488                    indexes: vec![],
489                };
490                Self::parse_value(s, 0, &temp_schema)
491            }
492            _ => Ok(SochValue::Text(s.to_string())),
493        }
494    }
495}
496
497/// A complete TOON table (header + rows)
498#[derive(Debug, Clone, Serialize, Deserialize)]
499pub struct SochTable {
500    pub schema: SochSchema,
501    pub rows: Vec<SochRow>,
502}
503
504impl SochTable {
505    pub fn new(schema: SochSchema) -> Self {
506        Self {
507            schema,
508            rows: Vec::new(),
509        }
510    }
511
512    pub fn with_rows(schema: SochSchema, rows: Vec<SochRow>) -> Self {
513        Self { schema, rows }
514    }
515
516    pub fn push(&mut self, row: SochRow) {
517        self.rows.push(row);
518    }
519
520    pub fn len(&self) -> usize {
521        self.rows.len()
522    }
523
524    pub fn is_empty(&self) -> bool {
525        self.rows.is_empty()
526    }
527
528    /// Format as TOON string
529    pub fn format(&self) -> String {
530        let fields: Vec<&str> = self.schema.fields.iter().map(|f| f.name.as_str()).collect();
531        let header = format!(
532            "{}[{}]{{{}}}:",
533            self.schema.name,
534            self.rows.len(),
535            fields.join(",")
536        );
537
538        let mut output = header;
539        for row in &self.rows {
540            output.push('\n');
541            output.push_str(&row.format());
542        }
543        output
544    }
545
546    /// Parse TOON string to table
547    pub fn parse(input: &str) -> Result<Self, String> {
548        let mut lines = input.lines();
549
550        // Parse header: name[count]{field1,field2,...}:
551        let header = lines.next().ok_or("Empty input")?;
552        let (schema, _count) = Self::parse_header(header)?;
553
554        // Parse rows
555        let mut rows = Vec::new();
556        for line in lines {
557            if line.trim().is_empty() {
558                continue;
559            }
560            let row = SochRow::parse(line, &schema)?;
561            rows.push(row);
562        }
563
564        Ok(Self { schema, rows })
565    }
566
567    fn parse_header(header: &str) -> Result<(SochSchema, usize), String> {
568        // name[count]{field1,field2,...}:
569        let header = header.trim_end_matches(':');
570
571        let bracket_start = header.find('[').ok_or("Missing [")?;
572        let bracket_end = header.find(']').ok_or("Missing ]")?;
573        let brace_start = header.find('{').ok_or("Missing {")?;
574        let brace_end = header.find('}').ok_or("Missing }")?;
575
576        let name = &header[..bracket_start];
577        let count_str = &header[bracket_start + 1..bracket_end];
578        let fields_str = &header[brace_start + 1..brace_end];
579
580        let count = count_str
581            .parse::<usize>()
582            .map_err(|e| format!("Invalid count: {}", e))?;
583
584        let field_names: Vec<&str> = fields_str.split(',').map(|s| s.trim()).collect();
585
586        let mut schema = SochSchema::new(name);
587        for field_name in field_names {
588            // Check if type is specified: field_name:type
589            if let Some(colon_pos) = field_name.find(':') {
590                let fname = &field_name[..colon_pos];
591                let ftype_str = &field_name[colon_pos + 1..];
592                let ftype = SochType::parse(ftype_str).unwrap_or(SochType::Text);
593                schema.fields.push(SochField {
594                    name: fname.to_string(),
595                    field_type: ftype,
596                    nullable: false,
597                    default: None,
598                });
599            } else {
600                // Default to text type
601                schema.fields.push(SochField {
602                    name: field_name.to_string(),
603                    field_type: SochType::Text,
604                    nullable: false,
605                    default: None,
606                });
607            }
608        }
609
610        Ok((schema, count))
611    }
612}
613
614impl fmt::Display for SochTable {
615    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
616        write!(f, "{}", self.format())
617    }
618}
619
620/// Trait for accessing columnar data without allocation
621pub trait ColumnAccess {
622    fn row_count(&self) -> usize;
623    fn col_count(&self) -> usize;
624    fn field_names(&self) -> Vec<&str>;
625    fn write_value(
626        &self,
627        col_idx: usize,
628        row_idx: usize,
629        f: &mut dyn std::fmt::Write,
630    ) -> std::fmt::Result;
631}
632
633/// Cursor for iterating over columnar data and emitting TOON format
634pub struct SochCursor<'a, C: ColumnAccess> {
635    access: &'a C,
636    current_row: usize,
637    header_emitted: bool,
638    schema_name: String,
639}
640
641impl<'a, C: ColumnAccess> SochCursor<'a, C> {
642    pub fn new(access: &'a C, schema_name: String) -> Self {
643        Self {
644            access,
645            current_row: 0,
646            header_emitted: false,
647            schema_name,
648        }
649    }
650}
651
652impl<'a, C: ColumnAccess> Iterator for SochCursor<'a, C> {
653    type Item = String;
654
655    fn next(&mut self) -> Option<Self::Item> {
656        if !self.header_emitted {
657            self.header_emitted = true;
658            let fields = self.access.field_names().join(",");
659            return Some(format!(
660                "{}[{}]{{{}}}:",
661                self.schema_name,
662                self.access.row_count(),
663                fields
664            ));
665        }
666
667        if self.current_row >= self.access.row_count() {
668            return None;
669        }
670
671        let mut row_str = String::new();
672        for col_idx in 0..self.access.col_count() {
673            if col_idx > 0 {
674                row_str.push(',');
675            }
676            // We ignore write errors here as String write shouldn't fail
677            let _ = self
678                .access
679                .write_value(col_idx, self.current_row, &mut row_str);
680        }
681
682        self.current_row += 1;
683        Some(row_str)
684    }
685}
686
687#[cfg(test)]
688mod tests {
689    use super::*;
690
691    #[test]
692    fn test_soch_value_display() {
693        assert_eq!(SochValue::Int(42).to_string(), "42");
694        assert_eq!(SochValue::Text("hello".into()).to_string(), "hello");
695        assert_eq!(
696            SochValue::Text("hello, world".into()).to_string(),
697            "\"hello, world\""
698        );
699        assert_eq!(SochValue::Bool(true).to_string(), "true");
700        assert_eq!(SochValue::Null.to_string(), "null");
701    }
702
703    #[test]
704    fn test_soch_schema() {
705        let schema = SochSchema::new("users")
706            .field("id", SochType::UInt)
707            .field("name", SochType::Text)
708            .field("email", SochType::Text)
709            .primary_key("id");
710
711        assert_eq!(schema.name, "users");
712        assert_eq!(schema.fields.len(), 3);
713        assert_eq!(schema.primary_key, Some("id".to_string()));
714    }
715
716    #[test]
717    fn test_soch_table_format() {
718        let schema = SochSchema::new("users")
719            .field("id", SochType::UInt)
720            .field("name", SochType::Text)
721            .field("email", SochType::Text);
722
723        let mut table = SochTable::new(schema);
724        table.push(SochRow::new(vec![
725            SochValue::UInt(1),
726            SochValue::Text("Alice".into()),
727            SochValue::Text("alice@example.com".into()),
728        ]));
729        table.push(SochRow::new(vec![
730            SochValue::UInt(2),
731            SochValue::Text("Bob".into()),
732            SochValue::Text("bob@example.com".into()),
733        ]));
734
735        let formatted = table.format();
736        assert!(formatted.contains("users[2]{id,name,email}:"));
737        assert!(formatted.contains("1,Alice,alice@example.com"));
738        assert!(formatted.contains("2,Bob,bob@example.com"));
739    }
740
741    #[test]
742    fn test_soch_table_parse() {
743        let input = r#"users[2]{id,name,email}:
7441,Alice,alice@example.com
7452,Bob,bob@example.com"#;
746
747        let table = SochTable::parse(input).unwrap();
748        assert_eq!(table.schema.name, "users");
749        assert_eq!(table.rows.len(), 2);
750    }
751
752    #[test]
753    fn test_soch_type_parse() {
754        assert_eq!(SochType::parse("int"), Some(SochType::Int));
755        assert_eq!(SochType::parse("text"), Some(SochType::Text));
756        assert_eq!(
757            SochType::parse("ref(users)"),
758            Some(SochType::Ref("users".into()))
759        );
760        assert_eq!(
761            SochType::parse("int?"),
762            Some(SochType::Optional(Box::new(SochType::Int)))
763        );
764    }
765}