jaq_json/
write.rs

1//! Functions and macros for writing (parts of) values.
2//!
3//! We use macros so that we can create both
4//! formatters ([core::fmt::Formatter]) and
5//! writers ([std::io::Write]) from the same code.
6
7use crate::{Tag, Val};
8use core::fmt::{self, Formatter};
9use std::io::{self, Write};
10
11/// Write a byte.
12///
13/// This uses `$f` to write bytes not corresponding to normal ASCII characters.
14///
15/// This is especially useful to pretty-print control characters, such as
16/// `'\n'` (U+000A), but also all other control characters.
17#[macro_export]
18macro_rules! write_byte {
19    ($w:ident, $c:expr, $f:expr) => {{
20        match $c {
21            // Rust does not recognise the following two character escapes
22            0x08 => write!($w, "\\b"),
23            0x0c => write!($w, "\\f"),
24            c @ (b'\t' | b'\n' | b'\r' | b'\\' | b'"') => {
25                write!($w, "{}", char::from(c).escape_default())
26            }
27            0x00..=0x1F | 0x7F..=0xFF => $f,
28            c => write!($w, "{}", char::from(c)),
29        }
30    }};
31}
32
33/// Write a UTF-8 string as JSON string, including leading and trailing quotes.
34///
35/// This uses `$f` to format byte slices that do not need to be escaped.
36#[macro_export]
37macro_rules! write_utf8 {
38    ($w:ident, $s:ident, $f:expr) => {{
39        write!($w, "\"")?;
40        let is_special = |c| matches!(c, 0x00..=0x1F | b'\\' | b'"' | 0x7F);
41        for s in $s.split_inclusive(|c| is_special(*c)) {
42            match s.split_last() {
43                Some((last, init)) if is_special(*last) => {
44                    $f(init)?;
45                    write_byte!($w, *last, write!($w, "\\u{last:04x}"))?
46                }
47                _ => $f(s)?,
48            }
49        }
50        write!($w, "\"")
51    }};
52}
53
54/// Write a byte string, including leading and trailing quotes.
55///
56/// This maps all non-ASCII `u8`s to `\xXX`.
57#[macro_export]
58macro_rules! write_bytes {
59    ($w:ident, $s: ident) => {{
60        write!($w, "b\"")?;
61        $s.iter()
62            .try_for_each(|c| write_byte!($w, *c, write!($w, "\\x{c:02x}")))?;
63        write!($w, "\"")
64    }};
65}
66
67macro_rules! write_seq {
68    ($w:ident, $iter:ident, $f:expr) => {{
69        if let Some(x) = $iter.next() {
70            $f(x)?;
71        }
72        $iter.try_for_each(|x| {
73            write!($w, ",")?;
74            $f(x)
75        })
76    }};
77}
78
79/// Write a value as JSON superset, using a function `$f` to write sub-values.
80///
81/// This macro writes strings by replacing invalid UTF-8 characters with the
82/// Unicode replacement character.
83/// That way, this macro can be used not only for writers, but also for
84/// formatters, which require all output to be valid UTF-8.
85/// However, the JSON/YAML writers usually override this behaviour,
86/// yielding invalid UTF-8 characters as-is.
87#[macro_export]
88macro_rules! write_val {
89    ($w:ident, $v:ident, $f:expr) => {{
90        use $crate::{bstr, Tag, Val};
91        match $v {
92            Val::Null => write!($w, "null"),
93            Val::Bool(b) => write!($w, "{b}"),
94            Val::Num(n) => write!($w, "{n}"),
95            Val::Str(b, Tag::Bytes) => write_bytes!($w, b),
96            Val::Str(s, Tag::Utf8) => write_utf8!($w, s, |part| write!($w, "{}", bstr(part))),
97            Val::Arr(a) => {
98                write!($w, "[")?;
99                let mut iter = a.iter();
100                write_seq!($w, iter, $f)?;
101                write!($w, "]")
102            }
103            Val::Obj(o) => {
104                write!($w, "{{")?;
105                let mut iter = o.iter();
106                write_seq!($w, iter, |(k, v)| {
107                    use jaq_std::ValT;
108                    $f(k)?;
109                    // YAML interprets {1:2}  as {"1:2": null}, whereas
110                    // it   interprets {1: 2} as {1: 2}
111                    // in order to keep compatibility with jq,
112                    // we add a space between ':' and the value
113                    // only if the key is a UTF-8 string
114                    write!($w, ":{}", if k.is_utf8_str() { "" } else { " " })?;
115                    $f(v)
116                })?;
117                write!($w, "}}")
118            }
119        }
120    }};
121}
122
123type WriteFn<T> = fn(&mut dyn Write, &T) -> io::Result<()>;
124type FormatFn<T> = fn(&mut Formatter, &T) -> fmt::Result;
125
126pub(crate) fn write_with(w: &mut dyn Write, v: &Val, f: WriteFn<Val>) -> io::Result<()> {
127    match v {
128        Val::Str(b, Tag::Bytes) => write_bytes!(w, b),
129        Val::Str(s, Tag::Utf8) => write_utf8!(w, s, |part| w.write_all(part)),
130        _ => write_val!(w, v, |v: &Val| f(w, v)),
131    }
132}
133
134/// Format a value as compact JSON, using a custom function to format child values.
135///
136/// This is useful to override how certain values are printed, e.g. for YAML.
137pub(crate) fn format_with(w: &mut Formatter, v: &Val, f: FormatFn<Val>) -> fmt::Result {
138    write_val!(w, v, |v: &Val| f(w, v))
139}
140
141/// Write a value as JSON.
142///
143/// Note that unlike jq, this may actually produce invalid JSON.
144/// In particular, this may yield:
145///
146/// - literals for special floating-point values (NaN, Infinity, -Infinity)
147/// - invalid UTF-8 characters
148/// - byte strings with `\xXX` sequences
149/// - objects with non-string keys
150///
151/// The key principles behind this behaviour are:
152///
153/// 1. Printing a value should always succeed.
154///    (Otherwise, there would exist values that we could not even inspect.)
155/// 2. Printing a value should yield valid JSON if and only if
156///    the value can be represented by an equivalent JSON value.
157///    (To give users a chance to find non-JSON values and to take appropriate action.)
158///
159/// jq and jaq agree on principle 1, but disagree on principle 2.
160/// In particular, this shows by the fact that `jq -n 'nan'` yields `null`.
161/// That means that jq maps values that cannot be represented by JSON
162/// to different values that can be represented by JSON.
163///
164/// In summary,
165/// jq may cause silent information loss, whereas
166/// jaq may yield invalid JSON values.
167/// Choose your poison.
168pub fn write(w: &mut dyn io::Write, v: &Val) -> io::Result<()> {
169    write_with(w, v, |w, v| write(w, v))
170}