forma_sif 0.1.0

SIF serialization and deserialization for forma_core.
Documentation
use crate::error::Error;
use forma_core::ser::*;
use std::io;

/// Serializer that writes SIF format to a writer.
///
/// Expects to serialize a sequence of structs. The first struct's field
/// names and types are used to generate the `#schema` line. All subsequent
/// structs become tab-delimited records.
pub struct Serializer<W: io::Write> {
    writer: W,
    schema_written: bool,
    field_names: Vec<String>,
    /// Accumulates field values for the current record.
    current_record: Vec<String>,
}

impl<W: io::Write> Serializer<W> {
    pub fn new(writer: W) -> Self {
        Serializer {
            writer,
            schema_written: false,
            field_names: Vec::new(),
            current_record: Vec::new(),
        }
    }

    /// Write the SIF header line.
    pub fn write_header(&mut self) -> Result<(), Error> {
        self.writer.write_all(b"#!sif v1\n")?;
        Ok(())
    }

    fn flush_record(&mut self) -> Result<(), Error> {
        if self.current_record.is_empty() {
            return Ok(());
        }
        let line = self.current_record.join("\t");
        self.writer.write_all(line.as_bytes())?;
        self.writer.write_all(b"\n")?;
        self.current_record.clear();
        Ok(())
    }
}

/// Serialize a sequence of values to a SIF string.
pub fn to_string<T: Serialize>(value: &T) -> Result<String, Error> {
    let mut buf = Vec::new();
    to_writer(&mut buf, value)?;
    Ok(unsafe { String::from_utf8_unchecked(buf) })
}

/// Serialize a sequence of values as SIF into a writer.
pub fn to_writer<W: io::Write, T: Serialize>(writer: W, value: &T) -> Result<(), Error> {
    let mut ser = Serializer::new(writer);
    ser.write_header()?;
    value.serialize(&mut ser)?;
    Ok(())
}

// The top-level serializer handles sequences — Vec<T> calls serialize_seq.
impl<'a, W: io::Write> forma_core::ser::Serializer for &'a mut Serializer<W> {
    type Ok = ();
    type Error = Error;
    type SerializeSeq = SeqSerializer<'a, W>;
    type SerializeTuple = SeqSerializer<'a, W>;
    type SerializeTupleStruct = Impossible<(), Error>;
    type SerializeTupleVariant = Impossible<(), Error>;
    type SerializeMap = Impossible<(), Error>;
    type SerializeStruct = StructSerializer<'a, W>;
    type SerializeStructVariant = Impossible<(), Error>;

    fn serialize_bool(self, v: bool) -> Result<(), Error> {
        self.current_record.push(if v { "T".into() } else { "F".into() });
        Ok(())
    }

    fn serialize_i8(self, v: i8) -> Result<(), Error> { self.serialize_i64(v as i64) }
    fn serialize_i16(self, v: i16) -> Result<(), Error> { self.serialize_i64(v as i64) }
    fn serialize_i32(self, v: i32) -> Result<(), Error> { self.serialize_i64(v as i64) }
    fn serialize_i64(self, v: i64) -> Result<(), Error> {
        self.current_record.push(v.to_string());
        Ok(())
    }
    fn serialize_i128(self, v: i128) -> Result<(), Error> {
        self.current_record.push(v.to_string());
        Ok(())
    }

    fn serialize_u8(self, v: u8) -> Result<(), Error> { self.serialize_u64(v as u64) }
    fn serialize_u16(self, v: u16) -> Result<(), Error> { self.serialize_u64(v as u64) }
    fn serialize_u32(self, v: u32) -> Result<(), Error> { self.serialize_u64(v as u64) }
    fn serialize_u64(self, v: u64) -> Result<(), Error> {
        self.current_record.push(v.to_string());
        Ok(())
    }
    fn serialize_u128(self, v: u128) -> Result<(), Error> {
        self.current_record.push(v.to_string());
        Ok(())
    }

    fn serialize_f32(self, v: f32) -> Result<(), Error> { self.serialize_f64(v as f64) }
    fn serialize_f64(self, v: f64) -> Result<(), Error> {
        if v.fract() == 0.0 && !v.is_nan() && !v.is_infinite() {
            self.current_record.push(format!("{:.1}", v));
        } else {
            self.current_record.push(v.to_string());
        }
        Ok(())
    }

    fn serialize_char(self, v: char) -> Result<(), Error> {
        self.serialize_str(&v.to_string())
    }

    fn serialize_str(self, v: &str) -> Result<(), Error> {
        // Quote if the string contains tab, newline, starts with #, etc.
        let needs_quoting = v.is_empty()
            || v.contains('\t')
            || v.contains('\n')
            || v.starts_with('#')
            || v.starts_with('"')
            || v.starts_with(' ')
            || v.ends_with(' ')
            || v == "_"
            || v == "T"
            || v == "F"
            || v == "---";
        if needs_quoting {
            let escaped = v
                .replace('\\', "\\\\")
                .replace('"', "\\\"")
                .replace('\n', "\\n")
                .replace('\t', "\\t");
            self.current_record.push(format!("\"{}\"", escaped));
        } else {
            self.current_record.push(v.to_string());
        }
        Ok(())
    }

    fn serialize_bytes(self, v: &[u8]) -> Result<(), Error> {
        use std::fmt::Write;
        let mut s = String::new();
        for (i, byte) in v.iter().enumerate() {
            if i > 0 { s.push(','); }
            write!(s, "{}", byte).unwrap();
        }
        self.current_record.push(format!("[{}]", s));
        Ok(())
    }

    fn serialize_none(self) -> Result<(), Error> {
        self.current_record.push("_".into());
        Ok(())
    }

    fn serialize_some<T: Serialize + ?Sized>(self, value: &T) -> Result<(), Error> {
        value.serialize(self)
    }

    fn serialize_unit(self) -> Result<(), Error> {
        self.current_record.push("_".into());
        Ok(())
    }

    fn serialize_unit_struct(self, _name: &'static str) -> Result<(), Error> {
        self.serialize_unit()
    }

    fn serialize_unit_variant(
        self,
        _name: &'static str,
        _variant_index: u32,
        variant: &'static str,
    ) -> Result<(), Error> {
        self.serialize_str(variant)
    }

    fn serialize_newtype_struct<T: Serialize + ?Sized>(
        self,
        _name: &'static str,
        value: &T,
    ) -> Result<(), Error> {
        value.serialize(self)
    }

    fn serialize_newtype_variant<T: Serialize + ?Sized>(
        self,
        _name: &'static str,
        _variant_index: u32,
        _variant: &'static str,
        _value: &T,
    ) -> Result<(), Error> {
        Err(Error::Message("newtype variants are not supported in SIF".into()))
    }

    fn serialize_seq(self, _len: Option<usize>) -> Result<SeqSerializer<'a, W>, Error> {
        Ok(SeqSerializer { ser: self })
    }

    fn serialize_tuple(self, _len: usize) -> Result<SeqSerializer<'a, W>, Error> {
        self.serialize_seq(None)
    }

    fn serialize_tuple_struct(
        self,
        _name: &'static str,
        _len: usize,
    ) -> Result<Self::SerializeTupleStruct, Error> {
        Err(Error::Message("tuple structs are not supported in SIF".into()))
    }

    fn serialize_tuple_variant(
        self,
        _name: &'static str,
        _variant_index: u32,
        _variant: &'static str,
        _len: usize,
    ) -> Result<Self::SerializeTupleVariant, Error> {
        Err(Error::Message("tuple variants are not supported in SIF".into()))
    }

    fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Error> {
        Err(Error::Message("maps are not directly supported at top level in SIF".into()))
    }

    fn serialize_struct(
        self,
        _name: &'static str,
        _len: usize,
    ) -> Result<StructSerializer<'a, W>, Error> {
        Ok(StructSerializer { ser: self })
    }

    fn serialize_struct_variant(
        self,
        _name: &'static str,
        _variant_index: u32,
        _variant: &'static str,
        _len: usize,
    ) -> Result<Self::SerializeStructVariant, Error> {
        Err(Error::Message("struct variants are not supported in SIF".into()))
    }

    fn is_human_readable(&self) -> bool {
        true
    }
}

// ── SeqSerializer ───────────────────────────────────────────────────
// Handles Vec<T> — each element is a record.

pub struct SeqSerializer<'a, W: io::Write> {
    ser: &'a mut Serializer<W>,
}

impl<W: io::Write> SerializeSeq for SeqSerializer<'_, W> {
    type Ok = ();
    type Error = Error;

    fn serialize_element<T: Serialize + ?Sized>(&mut self, value: &T) -> Result<(), Error> {
        value.serialize(&mut *self.ser)
    }

    fn end(self) -> Result<(), Error> {
        Ok(())
    }
}

impl<W: io::Write> SerializeTuple for SeqSerializer<'_, W> {
    type Ok = ();
    type Error = Error;

    fn serialize_element<T: Serialize + ?Sized>(&mut self, value: &T) -> Result<(), Error> {
        SerializeSeq::serialize_element(self, value)
    }

    fn end(self) -> Result<(), Error> {
        SerializeSeq::end(self)
    }
}

// ── StructSerializer ────────────────────────────────────────────────
// Handles individual struct → record conversion.

pub struct StructSerializer<'a, W: io::Write> {
    ser: &'a mut Serializer<W>,
}

impl<W: io::Write> SerializeStruct for StructSerializer<'_, W> {
    type Ok = ();
    type Error = Error;

    fn serialize_field<T: Serialize + ?Sized>(
        &mut self,
        key: &'static str,
        value: &T,
    ) -> Result<(), Error> {
        if !self.ser.schema_written {
            self.ser.field_names.push(key.to_string());
        }
        value.serialize(&mut *self.ser)
    }

    fn end(self) -> Result<(), Error> {
        if !self.ser.schema_written {
            // Infer types from the accumulated values and write the schema.
            let schema_parts: Vec<String> = self
                .ser
                .field_names
                .iter()
                .zip(self.ser.current_record.iter())
                .map(|(name, val)| {
                    let ty = infer_sif_type(val);
                    format!("{}:{}", name, ty)
                })
                .collect();
            let schema_line = format!("#schema {}\n", schema_parts.join(" "));
            self.ser.writer.write_all(schema_line.as_bytes())?;
            self.ser.schema_written = true;
        }
        self.ser.flush_record()
    }
}

/// Infer a SIF type string from a serialized value.
fn infer_sif_type(val: &str) -> &'static str {
    if val == "_" {
        return "str";
    }
    if val == "T" || val == "F" {
        return "bool";
    }
    if val.starts_with('[') {
        return "str[]";
    }
    if val.starts_with('{') {
        return "map";
    }
    if val.starts_with('"') {
        return "str";
    }
    // Try uint first (no sign, no decimal)
    if !val.starts_with('-') && !val.contains('.') {
        if val.parse::<u64>().is_ok() {
            return "uint";
        }
    }
    // Try int
    if !val.contains('.') {
        if val.parse::<i64>().is_ok() {
            return "int";
        }
    }
    // Try float
    if val.parse::<f64>().is_ok() {
        return "float";
    }
    "str"
}