preserves 4.996.0

Implementation of the Preserves serialization format via serde.
Documentation
//! Implementation of [Writer] for the text syntax.

use crate::hex::HexFormatter;
use crate::value::suspendable::Suspendable;
use crate::value::writer::CompoundWriter;
use crate::value::DomainEncode;
use crate::value::IOValue;
use crate::value::IOValueDomainCodec;
use crate::value::NestedValue;
use crate::value::Writer;

use lazy_static::lazy_static;

use num::bigint::BigInt;

use std::io;

use super::super::boundary as B;

/// Specifies a comma style for printing using [TextWriter].
#[derive(Clone, Copy, Debug)]
pub enum CommaStyle {
    /// No commas will be printed. (Preserves text syntax treats commas as whitespace (!).)
    None,
    /// Commas will be used to separate subterms.
    Separating,
    /// Commas will be used to terminate subterms.
    Terminating,
}

/// The (optionally pretty-printing) text syntax Preserves writer.
pub struct TextWriter<W: io::Write> {
    w: Suspendable<W>,
    /// Selects a comma style to use when printing.
    pub comma_style: CommaStyle,
    /// Specifies indentation to use when pretty-printing; 0 disables pretty-printing.
    pub indentation: usize,
    /// An aid to use of printed terms in shell scripts: set `true` to escape spaces embedded
    /// in strings and symbols.
    pub escape_spaces: bool,
    indent: String,
}

impl std::default::Default for CommaStyle {
    fn default() -> Self {
        CommaStyle::Separating
    }
}

impl TextWriter<&mut Vec<u8>> {
    /// Writes `v` to `f` using text syntax. Selects indentation mode based on
    /// [`f.alternate()`][std::fmt::Formatter::alternate].
    pub fn fmt_value<N: NestedValue, Enc: DomainEncode<N::Embedded>>(
        f: &mut std::fmt::Formatter<'_>,
        enc: &mut Enc,
        v: &crate::value::Value<N>,
    ) -> io::Result<()> {
        let mut buf: Vec<u8> = Vec::new();
        let mut w = TextWriter::new(&mut buf);
        if f.alternate() {
            w.indentation = 4
        }
        w.write_value(enc, v)?;
        f.write_str(std::str::from_utf8(&buf).expect("valid UTF-8 from TextWriter"))
            .map_err(|_| io::Error::new(io::ErrorKind::Other, "could not append to Formatter"))
    }

    /// Encode `v` to a [String].
    pub fn encode<N: NestedValue, Enc: DomainEncode<N::Embedded>>(
        enc: &mut Enc,
        v: &N,
    ) -> io::Result<String> {
        let mut buf: Vec<u8> = Vec::new();
        TextWriter::new(&mut buf).write(enc, v)?;
        Ok(String::from_utf8(buf).expect("valid UTF-8 from TextWriter"))
    }

    /// Encode `v` to a [String].
    pub fn encode_iovalue(v: &IOValue) -> io::Result<String> {
        Self::encode(&mut IOValueDomainCodec, v)
    }
}

impl<W: io::Write> TextWriter<W> {
    /// Construct a writer from the given byte sink `w`.
    pub fn new(w: W) -> Self {
        TextWriter {
            w: Suspendable::new(w),
            comma_style: CommaStyle::default(),
            indentation: 0,
            escape_spaces: false,
            indent: "\n".to_owned(),
        }
    }

    /// Update selected comma-printing style.
    pub fn set_comma_style(mut self, v: CommaStyle) -> Self {
        self.comma_style = v;
        self
    }

    /// Update selected space-escaping style.
    pub fn set_escape_spaces(mut self, v: bool) -> Self {
        self.escape_spaces = v;
        self
    }

    #[doc(hidden)]
    pub fn suspend(&mut self) -> Self {
        TextWriter {
            w: self.w.suspend(),
            indent: self.indent.clone(),
            ..*self
        }
    }

    #[doc(hidden)]
    pub fn resume(&mut self, other: Self) {
        self.w.resume(other.w)
    }

    #[doc(hidden)]
    pub fn write_stringlike_char_fallback<F>(&mut self, c: char, f: F) -> io::Result<()>
    where
        F: FnOnce(&mut W, char) -> io::Result<()>,
    {
        match c {
            '\\' => write!(self.w, "\\\\"),
            '\x08' => write!(self.w, "\\b"),
            '\x0c' => write!(self.w, "\\f"),
            '\x0a' => write!(self.w, "\\n"),
            '\x0d' => write!(self.w, "\\r"),
            '\x09' => write!(self.w, "\\t"),
            _ => f(&mut self.w, c),
        }
    }

    #[doc(hidden)]
    pub fn write_stringlike_char(&mut self, c: char) -> io::Result<()> {
        self.write_stringlike_char_fallback(c, |w, c| write!(w, "{}", c))
    }

    #[doc(hidden)]
    pub fn add_indent(&mut self) {
        for _ in 0..self.indentation {
            self.indent.push(' ')
        }
    }

    #[doc(hidden)]
    pub fn del_indent(&mut self) {
        if self.indentation > 0 {
            self.indent.truncate(self.indent.len() - self.indentation)
        }
    }

    #[doc(hidden)]
    pub fn indent(&mut self) -> io::Result<()> {
        if self.indentation > 0 {
            write!(self.w, "{}", &self.indent)
        } else {
            Ok(())
        }
    }

    #[doc(hidden)]
    pub fn indent_sp(&mut self) -> io::Result<()> {
        if self.indentation > 0 {
            write!(self.w, "{}", &self.indent)
        } else {
            write!(self.w, " ")
        }
    }

    /// Borrow the underlying byte sink.
    pub fn borrow_write(&mut self) -> &mut W {
        &mut self.w
    }
}

impl<W: io::Write> CompoundWriter for TextWriter<W> {
    #[inline]
    fn boundary(&mut self, b: &B::Type) -> io::Result<()> {
        match (b.closing.as_ref(), b.opening.as_ref()) {
            (None, Some(B::Item::RecordLabel))
            | (Some(B::Item::RecordLabel), None)
            | (Some(B::Item::RecordField), None) => return Ok(()),
            (_, Some(B::Item::RecordField)) => return write!(self.w, " "),

            (Some(B::Item::DictionaryKey), Some(B::Item::DictionaryValue)) => {
                return write!(self.w, ": ")
            }

            (None, Some(B::Item::Annotation)) => return write!(self.w, "@"),
            (Some(_), Some(B::Item::Annotation)) => return write!(self.w, " @"),
            (Some(B::Item::Annotation), Some(B::Item::AnnotatedValue)) => {
                return write!(self.w, " ")
            }
            (Some(B::Item::AnnotatedValue), None) => return Ok(()),

            _ => (),
        }

        match (b.closing.as_ref(), b.opening.as_ref()) {
            (None, None) => (),
            (None, Some(_)) => {
                self.add_indent();
                self.indent()?
            }
            (Some(_), Some(_)) => {
                match self.comma_style {
                    CommaStyle::Separating | CommaStyle::Terminating => write!(self.w, ",")?,
                    CommaStyle::None => (),
                }
                self.indent_sp()?
            }
            (Some(_), None) => {
                match self.comma_style {
                    CommaStyle::Terminating => write!(self.w, ",")?,
                    CommaStyle::Separating | CommaStyle::None => (),
                }
                self.del_indent();
                self.indent()?
            }
        }

        Ok(())
    }
}

macro_rules! simple_writer_method {
    ($n:ident, $argty:ty) => {
        fn $n(&mut self, v: $argty) -> io::Result<()> {
            write!(self.w, "{}", v)
        }
    };
}

impl<W: io::Write> Writer for TextWriter<W> {
    type AnnWriter = Self;
    type RecWriter = Self;
    type SeqWriter = Self;
    type SetWriter = Self;
    type DictWriter = Self;
    type EmbeddedWriter = Self;

    fn start_annotations(&mut self) -> io::Result<Self::AnnWriter> {
        Ok(self.suspend())
    }

    fn end_annotations(&mut self, ann: Self::AnnWriter) -> io::Result<()> {
        self.resume(ann);
        Ok(())
    }

    fn write_bool(&mut self, v: bool) -> io::Result<()> {
        write!(self.w, "{}", if v { "#t" } else { "#f" })
    }

    fn write_f64(&mut self, v: f64) -> io::Result<()> {
        if v.is_nan() || v.is_infinite() {
            write!(
                self.w,
                "#xd\"{}\"",
                HexFormatter::Packed.encode(&u64::to_be_bytes(f64::to_bits(v)))
            )
        } else {
            dtoa::write(&mut *self.w, v)?;
            Ok(())
        }
    }

    simple_writer_method!(write_i8, i8);
    simple_writer_method!(write_u8, u8);
    simple_writer_method!(write_i16, i16);
    simple_writer_method!(write_u16, u16);
    simple_writer_method!(write_i32, i32);
    simple_writer_method!(write_u32, u32);
    simple_writer_method!(write_i64, i64);
    simple_writer_method!(write_u64, u64);
    simple_writer_method!(write_i128, i128);
    simple_writer_method!(write_u128, u128);
    simple_writer_method!(write_int, &BigInt);

    fn write_string(&mut self, v: &str) -> io::Result<()> {
        write!(self.w, "\"")?;
        for c in v.chars() {
            match c {
                '"' => write!(self.w, "\\\"")?,
                ' ' if self.escape_spaces => write!(self.w, "\\u0020")?,
                _ => self.write_stringlike_char(c)?,
            }
        }
        write!(self.w, "\"")
    }

    fn write_bytes(&mut self, v: &[u8]) -> io::Result<()> {
        write!(
            self.w,
            "#[{}]",
            base64::encode_config(v, base64::URL_SAFE_NO_PAD)
        )
    }

    fn write_symbol(&mut self, v: &str) -> io::Result<()> {
        lazy_static! {
            // FIXME: This regular expression is conservatively correct, but Anglo-chauvinistic.
            static ref SYMBOL_OR_NUMBER: regex::Regex =
                regex::Regex::new("^[-a-zA-Z0-9~!$%^&*?_=+/.|]+$").unwrap();
            static ref NUMBER: regex::Regex =
                regex::Regex::new(r"^([-+]?\d+)((\.\d+([eE][-+]?\d+)?)|([eE][-+]?\d+))?$").unwrap();
        }
        if SYMBOL_OR_NUMBER.is_match(v) && !NUMBER.is_match(v) {
            write!(self.w, "{}", v)
        } else {
            write!(self.w, "'")?;
            for c in v.chars() {
                match c {
                    '\'' => write!(self.w, "\\'")?,
                    ' ' if self.escape_spaces => write!(self.w, "\\u0020")?,
                    _ => self.write_stringlike_char(c)?,
                }
            }
            write!(self.w, "'")
        }
    }

    fn start_record(&mut self, _field_count: Option<usize>) -> io::Result<Self::RecWriter> {
        write!(self.w, "<")?;
        Ok(self.suspend())
    }

    fn end_record(&mut self, rec: Self::RecWriter) -> io::Result<()> {
        self.resume(rec);
        write!(self.w, ">")
    }

    fn start_sequence(&mut self, _item_count: Option<usize>) -> io::Result<Self::SeqWriter> {
        write!(self.w, "[")?;
        Ok(self.suspend())
    }

    fn end_sequence(&mut self, seq: Self::SeqWriter) -> io::Result<()> {
        self.resume(seq);
        write!(self.w, "]")
    }

    fn start_set(&mut self, _item_count: Option<usize>) -> io::Result<Self::SetWriter> {
        write!(self.w, "#{{")?;
        Ok(self.suspend())
    }

    fn end_set(&mut self, set: Self::SetWriter) -> io::Result<()> {
        self.resume(set);
        write!(self.w, "}}")
    }

    fn start_dictionary(&mut self, _entry_count: Option<usize>) -> io::Result<Self::DictWriter> {
        write!(self.w, "{{")?;
        Ok(self.suspend())
    }

    fn end_dictionary(&mut self, dict: Self::DictWriter) -> io::Result<()> {
        self.resume(dict);
        write!(self.w, "}}")
    }

    fn start_embedded(&mut self) -> io::Result<Self::EmbeddedWriter> {
        write!(self.w, "#:")?;
        Ok(self.suspend())
    }

    fn end_embedded(&mut self, ptr: Self::EmbeddedWriter) -> io::Result<()> {
        self.resume(ptr);
        Ok(())
    }

    fn flush(&mut self) -> io::Result<()> {
        self.w.flush()
    }
}