yara_x_proto_json/
lib.rs

1/*! Serializes Protocol Buffer (protobuf) messages into JSON format.
2
3This crate provides functionality to serialize arbitrary protobuf messages
4into a structured JSON representation. Special handling is applied to certain
5protobuf field types that are not natively representable in JSON—most notably,
6`bytes` fields.
7
8Since raw byte sequences may contain non-UTF-8 data, they cannot be directly
9encoded as JSON strings. Instead, they are serialized as an object containing
10the base64-encoded value along with an encoding identifier. For example:
11
12```json
13{
14  "my_bytes_field": {
15    "encoding": "base64",
16    "value": "dGhpcyBpcyB0aGUgb3JpZ2luYWwgdmFsdWU="
17  }
18}
19```
20*/
21
22use std::borrow::Cow;
23use std::cmp::Ordering;
24use std::io::{Error, Write};
25
26use base64::prelude::BASE64_STANDARD;
27use base64::Engine;
28use itertools::Itertools;
29use protobuf::reflect::ReflectFieldRef::{Map, Optional, Repeated};
30use protobuf::reflect::{FieldDescriptor, MessageRef, ReflectValueRef};
31use protobuf::MessageDyn;
32use yansi::{Color, Paint, Style};
33
34use yara_x_proto::{get_field_format, FieldFormat};
35
36#[cfg(test)]
37mod tests;
38
39include!(concat!(env!("OUT_DIR"), "/protos/mod.rs"));
40
41const INDENTATION: u16 = 4;
42
43// A struct that represents colors for output
44#[derive(Default)]
45struct Colors {
46    string: Style,
47    field_name: Style,
48}
49
50/// Serializes a protobuf to JSON format.
51///
52/// Takes a protobuf message and produces a JSON representation of it.
53pub struct Serializer<W: Write> {
54    indent: u16,
55    output: W,
56    colors: Colors,
57}
58
59impl<W: Write> Serializer<W> {
60    /// Creates a new YAML serializer that writes its output to `w`.
61    pub fn new(w: W) -> Self {
62        Self { output: w, indent: 0, colors: Colors::default() }
63    }
64
65    /// Specifies whether the serializer should colorize the output.
66    ///
67    /// If true, the output contain ANSI escape sequences that make it
68    /// look nicer on compatible consoles. The default setting is `false`.
69    pub fn with_colors(&mut self, yes: bool) -> &mut Self {
70        self.colors = if yes {
71            Colors {
72                string: Color::Green.foreground(),
73                field_name: Color::Yellow.foreground(),
74            }
75        } else {
76            Colors::default()
77        };
78        self
79    }
80
81    /// Serializes the given protobuf message.
82    pub fn serialize(&mut self, msg: &dyn MessageDyn) -> Result<(), Error> {
83        self.write_msg(&MessageRef::new(msg))
84    }
85}
86
87impl<W: Write> Serializer<W> {
88    fn print_integer_value<T: Into<i64> + ToString + Copy>(
89        &mut self,
90        value: T,
91        format: FieldFormat,
92    ) -> Result<(), std::io::Error> {
93        if matches!(format, FieldFormat::Timestamp) {
94            write!(
95                self.output,
96                "{{ \"encoding\": \"timestamp\", \"value\": {} }}",
97                value.to_string()
98            )
99        } else {
100            write!(self.output, "{}", value.to_string())
101        }
102    }
103
104    fn escape(s: &str) -> Cow<'_, str> {
105        if s.chars()
106            .any(|c| matches!(c, '\n' | '\r' | '\t' | '\'' | '"' | '\\'))
107        {
108            let mut result = String::with_capacity(s.len());
109            for c in s.chars() {
110                match c {
111                    '\n' => result.push_str(r"\n"),
112                    '\r' => result.push_str(r"\r"),
113                    '\t' => result.push_str(r"\t"),
114                    '"' => result.push_str("\\\""),
115                    '\\' => result.push_str(r"\\"),
116                    _ => result.push(c),
117                }
118            }
119            Cow::Owned(result)
120        } else {
121            Cow::Borrowed(s)
122        }
123    }
124
125    fn write_field_name(&mut self, name: &str) -> Result<(), Error> {
126        write!(self.output, "\"{}\": ", name.paint(self.colors.field_name))
127    }
128
129    fn write_msg(&mut self, msg: &MessageRef) -> Result<(), Error> {
130        let descriptor = msg.descriptor_dyn();
131
132        // Iterator that returns only the non-empty fields in the message.
133        let mut non_empty_fields = descriptor
134            .fields()
135            .filter(|field| match field.get_reflect(&**msg) {
136                Optional(optional) => optional.value().is_some(),
137                Repeated(repeated) => !repeated.is_empty(),
138                Map(map) => !map.is_empty(),
139            })
140            .peekable();
141
142        write!(self.output, "{{")?;
143        self.indent += INDENTATION;
144        self.newline()?;
145
146        while let Some(field) = non_empty_fields.next() {
147            match field.get_reflect(&**msg) {
148                Optional(optional) => {
149                    let value = optional.value().unwrap();
150                    self.write_field_name(field.name())?;
151                    self.write_value(&field, &value)?;
152                }
153                Repeated(repeated) => {
154                    self.write_field_name(field.name())?;
155                    write!(self.output, "[")?;
156                    self.indent += INDENTATION;
157                    self.newline()?;
158                    let mut items = repeated.into_iter().peekable();
159                    while let Some(value) = items.next() {
160                        self.write_value(&field, &value)?;
161                        if items.peek().is_some() {
162                            write!(self.output, ",")?;
163                            self.newline()?;
164                        }
165                    }
166                    self.indent -= INDENTATION;
167                    self.newline()?;
168                    write!(self.output, "]")?;
169                }
170                Map(map) => {
171                    self.write_field_name(field.name())?;
172                    write!(self.output, "{{")?;
173                    self.indent += INDENTATION;
174                    self.newline()?;
175
176                    // Iteration order is not stable (i.e: the order in which
177                    // items are returned can vary from one execution to the
178                    // other), because the underlying data structure is a
179                    // HashMap. For this reason items are wrapped in a KV
180                    // struct (which implement the Ord trait) and sorted.
181                    // Key-value pairs are sorted by key.
182                    let mut items = map
183                        .into_iter()
184                        .map(|(key, value)| KV { key, value })
185                        .sorted()
186                        .peekable();
187
188                    while let Some(item) = items.next() {
189                        let key = item.key.to_string();
190                        write!(
191                            self.output,
192                            "\"{}\": ",
193                            Self::escape(&key).paint(self.colors.string)
194                        )?;
195                        self.write_value(&field, &item.value)?;
196                        if items.peek().is_some() {
197                            write!(self.output, ",")?;
198                            self.newline()?;
199                        } else {
200                            self.indent -= INDENTATION;
201                            self.newline()?;
202                        }
203                    }
204                    write!(self.output, "}}")?;
205                }
206            }
207
208            if non_empty_fields.peek().is_some() {
209                write!(self.output, ",")?;
210                self.newline()?;
211            }
212        }
213
214        self.indent -= INDENTATION;
215        self.newline()?;
216        write!(self.output, "}}")?;
217
218        Ok(())
219    }
220
221    fn write_value(
222        &mut self,
223        field: &FieldDescriptor,
224        value: &ReflectValueRef,
225    ) -> Result<(), Error> {
226        match value {
227            ReflectValueRef::U32(v) => {
228                self.print_integer_value(*v, get_field_format(field))?
229            }
230            ReflectValueRef::U64(v) => {
231                self.print_integer_value(*v as i64, get_field_format(field))?
232            }
233            ReflectValueRef::I32(v) => {
234                self.print_integer_value(*v, get_field_format(field))?
235            }
236            ReflectValueRef::I64(v) => {
237                self.print_integer_value(*v, get_field_format(field))?
238            }
239            ReflectValueRef::F32(v) => write!(self.output, "{}", v)?,
240            ReflectValueRef::F64(v) => write!(self.output, "{}", v)?,
241            ReflectValueRef::Bool(v) => write!(self.output, "{}", v)?,
242            ReflectValueRef::String(v) => {
243                write!(
244                    self.output,
245                    "\"{}\"",
246                    Self::escape(v).paint(self.colors.string)
247                )?;
248            }
249            ReflectValueRef::Bytes(v) => write!(
250                self.output,
251                "{{ \"encoding\": \"base64\", \"value\": \"{}\"}}",
252                BASE64_STANDARD.encode(v).paint(self.colors.string)
253            )?,
254            ReflectValueRef::Enum(d, v) => match d.value_by_number(*v) {
255                Some(e) => write!(self.output, "\"{}\"", e.name())?,
256                None => write!(self.output, "{}", v)?,
257            },
258            ReflectValueRef::Message(msg) => self.write_msg(msg)?,
259        }
260        Ok(())
261    }
262
263    fn newline(&mut self) -> Result<(), Error> {
264        writeln!(self.output)?;
265        for _ in 0..self.indent {
266            write!(self.output, " ")?;
267        }
268        Ok(())
269    }
270}
271
272/// Helper type that allows to sort the entries in protobuf map.
273struct KV<'a> {
274    key: ReflectValueRef<'a>,
275    value: ReflectValueRef<'a>,
276}
277
278impl PartialOrd for KV<'_> {
279    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
280        Some(self.cmp(other))
281    }
282}
283
284impl Ord for KV<'_> {
285    fn cmp(&self, other: &Self) -> Ordering {
286        match self.key {
287            ReflectValueRef::U32(v) => {
288                v.cmp(&other.key.to_u32().unwrap_or_default())
289            }
290            ReflectValueRef::U64(v) => {
291                v.cmp(&other.key.to_u64().unwrap_or_default())
292            }
293            ReflectValueRef::I32(v) => {
294                v.cmp(&other.key.to_i32().unwrap_or_default())
295            }
296            ReflectValueRef::I64(v) => {
297                v.cmp(&other.key.to_i64().unwrap_or_default())
298            }
299            ReflectValueRef::Bool(v) => {
300                v.cmp(&other.key.to_bool().unwrap_or_default())
301            }
302            ReflectValueRef::String(v) => {
303                v.cmp(other.key.to_str().unwrap_or_default())
304            }
305            _ => {
306                // Protobuf doesn't support map keys of any other type
307                // except the ones listed above.
308                panic!("unsupported type in map key")
309            }
310        }
311    }
312}
313
314impl PartialEq for KV<'_> {
315    fn eq(&self, other: &Self) -> bool {
316        self.key.to_str().eq(&other.key.to_str())
317    }
318}
319
320impl Eq for KV<'_> {}