json_comments/
lib.rs

1//! `json_comments` is a library to strip out comments from JSON-like test. By processing text
2//! through a [`StripComments`] adapter first, it is possible to use a standard JSON parser (such
3//! as [serde_json](https://crates.io/crates/serde_json) with quasi-json input that contains
4//! comments.
5//!
6//! In fact, this code makes few assumptions about the input and could probably be used to strip
7//! comments out of other types of code as well, provided that strings use double quotes and
8//! backslashes are used for escapes in strings.
9//!
10//! The following types of comments are supported:
11//!   - C style block comments (`/* ... */`)
12//!   - C style line comments (`// ...`)
13//!   - Shell style line comments (`# ...`)
14//!
15//! ## Example using serde_json
16//!
17//! ```
18//! use serde_json::{Result, Value};
19//! use json_comments::StripComments;
20//!
21//! # fn main() -> Result<()> {
22//! // Some JSON input data as a &str. Maybe this comes form the user.
23//! let data = r#"
24//!     {
25//!         "name": /* full */ "John Doe",
26//!         "age": 43,
27//!         "phones": [
28//!             "+44 1234567", // work phone
29//!             "+44 2345678"  // home phone
30//!         ]
31//!     }"#;
32//!
33//! // Strip the comments from the input (use `as_bytes()` to get a `Read`).
34//! let stripped = StripComments::new(data.as_bytes());
35//! // Parse the string of data into serde_json::Value.
36//! let v: Value = serde_json::from_reader(stripped)?;
37//!
38//! println!("Please call {} at the number {}", v["name"], v["phones"][0]);
39//!
40//! # Ok(())
41//! # }
42//! ```
43//!
44use std::io::{ErrorKind, Read, Result};
45
46#[derive(Eq, PartialEq, Copy, Clone, Debug)]
47enum State {
48    Top,
49    InString,
50    StringEscape,
51    InComment,
52    InBlockComment,
53    MaybeCommentEnd,
54    InLineComment,
55}
56
57use State::*;
58
59/// A [`Read`] that transforms another [`Read`] so that it changes all comments to spaces so that a downstream json parser
60/// (such as json-serde) doesn't choke on them.
61///
62/// The supported comments are:
63///   - C style block comments (`/* ... */`)
64///   - C style line comments (`// ...`)
65///   - Shell style line comments (`# ...`)
66///
67/// ## Example
68/// ```
69/// use json_comments::StripComments;
70/// use std::io::Read;
71///
72/// let input = r#"{
73/// // c line comment
74/// "a": "comment in string /* a */",
75/// ## shell line comment
76/// } /** end */"#;
77///
78/// let mut stripped = String::new();
79/// StripComments::new(input.as_bytes()).read_to_string(&mut stripped).unwrap();
80///
81/// assert_eq!(stripped, "{
82///                  \n\"a\": \"comment in string /* a */\",
83///                     \n}           ");
84///
85/// ```
86///
87pub struct StripComments<T: Read> {
88    inner: T,
89    state: State,
90    settings: CommentSettings,
91}
92
93impl<T> StripComments<T>
94where
95    T: Read,
96{
97    pub fn new(input: T) -> Self {
98        Self {
99            inner: input,
100            state: Top,
101            settings: CommentSettings::default(),
102        }
103    }
104
105    /// Create a new `StripComments` with settings which may be different from the default.
106    ///
107    /// This is useful if you wish to disable allowing certain kinds of comments.
108    #[inline]
109    pub fn with_settings(settings: CommentSettings, input: T) -> Self {
110        Self {
111            inner: input,
112            state: Top,
113            settings,
114        }
115    }
116}
117
118macro_rules! invalid_data {
119    () => {
120        return Err(ErrorKind::InvalidData.into())
121    };
122}
123
124impl<T> Read for StripComments<T>
125where
126    T: Read,
127{
128    fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
129        let count = self.inner.read(buf)?;
130        if count > 0 {
131            for c in buf[..count].iter_mut() {
132                self.state = match self.state {
133                    Top => top(c, &self.settings),
134                    InString => in_string(*c),
135                    StringEscape => InString,
136                    InComment => in_comment(c, &self.settings)?,
137                    InBlockComment => in_block_comment(c),
138                    MaybeCommentEnd => maybe_comment_end(c),
139                    InLineComment => in_line_comment(c),
140                }
141            }
142        } else if self.state != Top && self.state != InLineComment {
143            invalid_data!();
144        }
145        Ok(count)
146    }
147}
148
149/// Settings for `StripComments`
150///
151/// The default is for all comment types to be enabled.
152#[derive(Copy, Clone, Debug)]
153pub struct CommentSettings {
154    /// True if c-style block comments (`/* ... */`) are allowed
155    block_comments: bool,
156    /// True if c-style `//` line comments are allowed
157    slash_line_comments: bool,
158    /// True if shell-style `#` line comments are allowed
159    hash_line_comments: bool,
160}
161
162impl Default for CommentSettings {
163    fn default() -> Self {
164        Self::all()
165    }
166}
167
168impl CommentSettings {
169    /// Enable all comment Styles
170    pub const fn all() -> Self {
171        Self {
172            block_comments: true,
173            slash_line_comments: true,
174            hash_line_comments: true,
175        }
176    }
177    /// Only allow line comments starting with `#`
178    pub const fn hash_only() -> Self {
179        Self {
180            hash_line_comments: true,
181            block_comments: false,
182            slash_line_comments: false,
183        }
184    }
185    /// Only allow "c-style" comments.
186    ///
187    /// Specifically, line comments beginning with `//` and
188    /// block comment like `/* ... */`.
189    pub const fn c_style() -> Self {
190        Self {
191            block_comments: true,
192            slash_line_comments: true,
193            hash_line_comments: false,
194        }
195    }
196
197    /// Create a new `StripComments` for `input`, using these settings.
198    ///
199    /// Transform `input` into a [`Read`] that strips out comments.
200    /// The types of comments to support are determined by the configuration of
201    /// `self`.
202    ///
203    /// ## Examples
204    ///
205    /// ```
206    /// use json_comments::CommentSettings;
207    /// use std::io::Read;
208    ///
209    /// let input = r#"{
210    /// // c line comment
211    /// "a": "b"
212    /// /** multi line
213    /// comment
214    /// */ }"#;
215    ///
216    /// let mut stripped = String::new();
217    /// CommentSettings::c_style().strip_comments(input.as_bytes()).read_to_string(&mut stripped).unwrap();
218    ///
219    /// assert_eq!(stripped, "{
220    ///                  \n\"a\": \"b\"
221    ///                           }");
222    /// ```
223    ///
224    /// ```
225    /// use json_comments::CommentSettings;
226    /// use std::io::Read;
227    ///
228    /// let input = r#"{
229    /// ## shell line comment
230    /// "a": "b"
231    /// }"#;
232    ///
233    /// let mut stripped = String::new();
234    /// CommentSettings::hash_only().strip_comments(input.as_bytes()).read_to_string(&mut stripped).unwrap();
235    ///
236    /// assert_eq!(stripped, "{
237    ///                     \n\"a\": \"b\"\n}");
238    /// ```
239    #[inline]
240    pub fn strip_comments<I: Read>(self, input: I) -> StripComments<I> {
241        StripComments::with_settings(self, input)
242    }
243}
244
245fn top(c: &mut u8, settings: &CommentSettings) -> State {
246    match *c {
247        b'"' => InString,
248        b'/' => {
249            *c = b' ';
250            InComment
251        }
252        b'#' if settings.hash_line_comments => {
253            *c = b' ';
254            InLineComment
255        }
256        _ => Top,
257    }
258}
259
260fn in_string(c: u8) -> State {
261    match c {
262        b'"' => Top,
263        b'\\' => StringEscape,
264        _ => InString,
265    }
266}
267
268fn in_comment(c: &mut u8, settings: &CommentSettings) -> Result<State> {
269    let new_state = match c {
270        b'*' if settings.block_comments => InBlockComment,
271        b'/' if settings.slash_line_comments => InLineComment,
272        _ => invalid_data!(),
273    };
274    *c = b' ';
275    Ok(new_state)
276}
277
278fn in_block_comment(c: &mut u8) -> State {
279    let old = *c;
280    *c = b' ';
281    if old == b'*' {
282        MaybeCommentEnd
283    } else {
284        InBlockComment
285    }
286}
287
288fn maybe_comment_end(c: &mut u8) -> State {
289    let old = *c;
290    *c = b' ';
291    match old {
292        b'/' => Top,
293        b'*' => MaybeCommentEnd,
294        _ => InBlockComment,
295    }
296}
297
298fn in_line_comment(c: &mut u8) -> State {
299    if *c == b'\n' {
300        Top
301    } else {
302        *c = b' ';
303        InLineComment
304    }
305}
306
307#[cfg(test)]
308mod tests {
309    use super::*;
310    use std::io::{ErrorKind, Read};
311
312    fn strip_string(input: &str) -> String {
313        let mut out = String::new();
314        let count = StripComments::new(input.as_bytes())
315            .read_to_string(&mut out)
316            .unwrap();
317        assert_eq!(count, input.len());
318        out
319    }
320
321    #[test]
322    fn block_comments() {
323        let json = r#"{/* Comment */"hi": /** abc */ "bye"}"#;
324        let stripped = strip_string(json);
325        assert_eq!(stripped, r#"{             "hi":            "bye"}"#);
326    }
327
328    #[test]
329    fn block_comments_with_possible_end() {
330        let json = r#"{/* Comment*PossibleEnd */"hi": /** abc */ "bye"}"#;
331        let stripped = strip_string(json);
332        assert_eq!(
333            stripped,
334            r#"{                         "hi":            "bye"}"#
335        );
336    }
337
338    // See https://github.com/tmccombs/json-comments-rs/issues/12
339    // Make sure we can parse a block comment that ends with more than one "*"
340    #[test]
341    fn doc_comment() {
342        let json = r##"/** C **/ { "foo": 123 }"##;
343        let stripped = strip_string(json);
344        assert_eq!(stripped, r##"          { "foo": 123 }"##);
345    }
346
347    #[test]
348    fn line_comments() {
349        let json = r#"{
350            // line comment
351            "a": 4,
352            # another
353        }"#;
354
355        let expected = "{
356                           \n            \"a\": 4,
357                     \n        }";
358
359        assert_eq!(strip_string(json), expected);
360    }
361
362    #[test]
363    fn incomplete_string() {
364        let json = r#""foo"#;
365        let mut stripped = String::new();
366
367        let err = StripComments::new(json.as_bytes())
368            .read_to_string(&mut stripped)
369            .unwrap_err();
370        assert_eq!(err.kind(), ErrorKind::InvalidData);
371    }
372
373    #[test]
374    fn incomplete_comment() {
375        let json = r#"/* foo "#;
376        let mut stripped = String::new();
377
378        let err = StripComments::new(json.as_bytes())
379            .read_to_string(&mut stripped)
380            .unwrap_err();
381        assert_eq!(err.kind(), ErrorKind::InvalidData);
382    }
383
384    #[test]
385    fn incomplete_comment2() {
386        let json = r#"/* foo *"#;
387        let mut stripped = String::new();
388
389        let err = StripComments::new(json.as_bytes())
390            .read_to_string(&mut stripped)
391            .unwrap_err();
392        assert_eq!(err.kind(), ErrorKind::InvalidData);
393    }
394
395    #[test]
396    fn no_hash_comments() {
397        let json = r#"# bad comment
398        {"a": "b"}"#;
399        let mut stripped = String::new();
400        CommentSettings::c_style()
401            .strip_comments(json.as_bytes())
402            .read_to_string(&mut stripped)
403            .unwrap();
404        assert_eq!(stripped, json);
405    }
406
407    #[test]
408    fn no_slash_line_comments() {
409        let json = r#"// bad comment
410        {"a": "b"}"#;
411        let mut stripped = String::new();
412        let err = CommentSettings::hash_only()
413            .strip_comments(json.as_bytes())
414            .read_to_string(&mut stripped)
415            .unwrap_err();
416        assert_eq!(err.kind(), ErrorKind::InvalidData);
417    }
418
419    #[test]
420    fn no_block_comments() {
421        let json = r#"/* bad comment */ {"a": "b"}"#;
422        let mut stripped = String::new();
423        let err = CommentSettings::hash_only()
424            .strip_comments(json.as_bytes())
425            .read_to_string(&mut stripped)
426            .unwrap_err();
427        assert_eq!(err.kind(), ErrorKind::InvalidData);
428    }
429}