protobuf_to_json/
parser.rs

1//! Protobuf parser.
2
3use std::ops::Range;
4
5use base64::prelude::*;
6use serde_json::{Map, Value, json};
7
8use crate::{Field, FieldValue, Message, message::WireType, varint::decode_var};
9
10const RESERVED_FIELD_NUMBER: Range<u64> = 19000..20000;
11
12/// A protobuf parser that converts protobuf messages to JSON.
13#[derive(Debug, Default, Clone, PartialEq, Eq, Hash)]
14pub struct Parser {
15    /// How to encode bytes fields when converting to JSON.
16    pub bytes_encoding: BytesEncoding,
17}
18
19impl Parser {
20    /// Create a new parser.
21    pub fn new() -> Self {
22        Self::default()
23    }
24
25    /// Create a new parser with the given bytes encoding method.
26    pub fn with_bytes_encoding(bytes_encoding: BytesEncoding) -> Self {
27        Self { bytes_encoding }
28    }
29
30    /// Parse a protobuf message from the given byte slice and convert it to JSON.
31    pub fn parse(&self, data: &[u8]) -> Option<Value> {
32        self.parse_to_json(data, true)
33    }
34
35    /// Recursively parse a protobuf message and convert it to JSON.
36    fn parse_to_json(&self, data: &[u8], first_layer: bool) -> Option<Value> {
37        if data.is_empty() {
38            return None;
39        }
40
41        // Check if the data is valid UTF-8 and not control characters
42        let utf8_str = simdutf8::basic::from_utf8(data);
43        if !first_layer && utf8_str.is_ok_and(|s| s.chars().all(|c| !c.is_control())) {
44            return None;
45        }
46
47        let Message { fields, garbage } = self.parse_once(data);
48        if fields.is_empty() {
49            return None;
50        }
51        // If not the first layer, and the data is valid UTF-8 and contains garbage or reserved fields, return None
52        if !first_layer
53            && utf8_str.is_ok()
54            && (garbage.is_some()
55                || fields
56                    .iter()
57                    .any(|f| RESERVED_FIELD_NUMBER.contains(&f.number)))
58        {
59            return None;
60        }
61
62        let mut map = Map::new();
63        for field in fields {
64            let key = field.number.to_string();
65            let value = match field.value {
66                FieldValue::Varint(v) => Value::Number((v as usize).into()),
67                FieldValue::Fixed64(v) => Value::Number(v.into()),
68                FieldValue::Fixed32(v) => Value::Number(v.into()),
69                FieldValue::LengthDelimited(bytes) => {
70                    if let Some(nested) = self.parse_to_json(bytes, false) {
71                        nested
72                    } else {
73                        match self.bytes_encoding {
74                            BytesEncoding::Auto => {
75                                if let Ok(s) = std::str::from_utf8(bytes) {
76                                    Value::String(s.to_string())
77                                } else {
78                                    Value::String(BASE64_STANDARD.encode(bytes))
79                                }
80                            }
81                            BytesEncoding::Base64 => Value::String(BASE64_STANDARD.encode(bytes)),
82                            BytesEncoding::ByteArray => {
83                                json!(bytes)
84                            }
85                            #[cfg(feature = "stfu8")]
86                            BytesEncoding::Stfu8 => Value::String(stfu8::encode_u8(bytes)),
87                            BytesEncoding::StringLossy => {
88                                let s = String::from_utf8_lossy(bytes);
89                                Value::String(s.to_string())
90                            }
91                        }
92                    }
93                }
94                FieldValue::Invalid(_, _) | FieldValue::Incomplete(_, _) => match first_layer {
95                    true => break,
96                    false => return None,
97                },
98            };
99
100            if let Some(existing) = map.get_mut(&key) {
101                if let Value::Array(arr) = existing {
102                    arr.push(value);
103                } else {
104                    let old_value = existing.clone();
105                    *existing = Value::Array(vec![old_value, value]);
106                }
107            } else {
108                map.insert(key, value);
109            }
110        }
111
112        Some(Value::Object(map))
113    }
114
115    /// Parse a protobuf message from the given byte slice without recursion.
116    pub fn parse_once<'a>(&self, mut data: &'a [u8]) -> Message<'a> {
117        let mut msg = Message {
118            fields: vec![],
119            garbage: None,
120        };
121
122        let data = &mut data;
123
124        loop {
125            if data.is_empty() {
126                break;
127            }
128
129            let tag = match decode_var(data) {
130                Ok(tag) => tag,
131                Err(_) => {
132                    msg.garbage = Some(data);
133                    break;
134                }
135            };
136
137            let number = tag >> 3;
138            let wire_type = WireType::from((tag & 0x07) as u8);
139
140            let value = FieldValue::decode(data, wire_type);
141            msg.fields.push(Field { number, value });
142        }
143
144        msg
145    }
146}
147
148/// How to encode bytes fields when converting to JSON.
149///
150#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)]
151pub enum BytesEncoding {
152    #[default]
153    /// Encode bytes as a string if valid UTF-8, otherwise as base64.
154    Auto,
155
156    /// Encode bytes as base64 string.
157    Base64,
158
159    /// Encode bytes as a JSON array of numbers.
160    ByteArray,
161
162    #[cfg(feature = "stfu8")]
163    /// Encode bytes as [stfu8](https://crates.io/crates/stfu8) encoded string.
164    Stfu8,
165
166    /// Encode bytes as a UTF-8 lossy string.
167    StringLossy,
168}
169
170#[cfg(test)]
171mod tests {
172    use hex_literal::hex;
173
174    use super::*;
175
176    #[test]
177    fn test_parse_1() {
178        let data = hex!("0d1c0000001203596f751a024d65202b2a0a0a066162633132331200");
179        let parser = Parser::new();
180        let json = parser.parse(&data).unwrap();
181        let expected = json!({
182            "1": 28,
183            "2": "You",
184            "3": "Me",
185            "4": 43,
186            "5": {
187                "1": "abc123",
188                "2": ""
189            }
190        });
191        assert_eq!(json, expected);
192    }
193
194    #[test]
195    fn test_parse_2() {
196        let data =
197            hex!("0d1c0000001203596f751a024d65202b2a0a0a06616263313233120031ba32a96cc10200003801");
198        let parser = Parser::new();
199        let json = parser.parse(&data).unwrap();
200        let expected = json!({"1":28,"2":"You","3":"Me","4":43,"5":{"1":"abc123","2":""},"6":3029774971578u64,"7":1});
201        assert_eq!(json, expected);
202    }
203
204    #[test]
205    fn test_parse_3() {
206        let data = hex!(
207            "0a0a6173636f6e2d66756c6c120a6173636f6e2d66756c6c1a1b323032352d30392d30325430393a33373a32362e3033393032385a2203302e312a0474657374421b323032352d30392d30325430393a33373a32362e3033393032385a480068007205302e312e308a016e46756c6c204173636f6e20696d706c656d656e746174696f6e202868617368e280913235362c2041454144e280913132382077697468206e6f6e6365206d61736b696e67202620746167207472756e636174696f6e2c20584f46e280913132382c2043584f46e28091313238292e92012368747470733a2f2f6769746875622e636f6d2f6a6a6b756d2f6173636f6e2d66756c6c9a011a68747470733a2f2f646f63732e72732f6173636f6e2d66756c6ca2012368747470733a2f2f6769746875622e636f6d2f6a6a6b756d2f6173636f6e2d66756c6caa014612222f6170692f76312f6372617465732f6173636f6e2d66756c6c2f76657273696f6e731a202f6170692f76312f6372617465732f6173636f6e2d66756c6c2f6f776e657273"
208        );
209        let parser = Parser::new();
210        let json = parser.parse(&data).unwrap();
211        let expected = json!({
212          "1": "ascon-full",
213          "13": 0,
214          "14": "0.1.0",
215          "17": "Full Ascon implementation (hash‑256, AEAD‑128 with nonce masking & tag truncation, XOF‑128, CXOF‑128).",
216          "18": "https://github.com/jjkum/ascon-full",
217          "19": "https://docs.rs/ascon-full",
218          "2": "ascon-full",
219          "20": "https://github.com/jjkum/ascon-full",
220          "21": {
221            "2": "/api/v1/crates/ascon-full/versions",
222            "3": "/api/v1/crates/ascon-full/owners"
223          },
224          "3": "2025-09-02T09:37:26.039028Z",
225          "4": "0.1",
226          "5": "test",
227          "8": "2025-09-02T09:37:26.039028Z",
228          "9": 0
229        });
230        assert_eq!(json, expected);
231    }
232
233    #[test]
234    fn test_parse_encoding_bytearray() {
235        let data = hex!("4a050001020304");
236        let parser = Parser::with_bytes_encoding(BytesEncoding::ByteArray);
237        let json = parser.parse(&data).unwrap();
238        let expected = json!({"9":[0,1,2,3,4]});
239        assert_eq!(json, expected);
240    }
241
242    #[test]
243    fn test_parse_num_array() {
244        let data = hex!("4a050001020304");
245        let parser = Parser::new();
246        let json = parser.parse(&data).unwrap();
247        let expected = json!({"9":"\u{0000}\u{0001}\u{0002}\u{0003}\u{0004}"});
248        assert_eq!(json, expected);
249    }
250}