json_strip_comments/
lib.rs

1//! Replace json comments and trailing commas in place.
2//!
3//! A fork of a fork:
4//!
5//! * <https://github.com/tmccombs/json-comments-rs>
6//! * <https://github.com/parcel-bundler/parcel/pull/9032>
7//!
8//! `json-strip-comments` is a library to strip out comments from JSON. By processing text
9//! through a [`StripComments`] adapter first, it is possible to use a standard JSON parser (such
10//! as [serde_json](https://crates.io/crates/serde_json) with quasi-json input that contains
11//! comments.
12//!
13//! In fact, this code makes few assumptions about the input and could probably be used to strip
14//! comments out of other types of code as well, provided that strings use double quotes and
15//! backslashes are used for escapes in strings.
16//!
17//! The following types of comments are supported:
18//!   - C style block comments (`/* ... */`)
19//!   - C style line comments (`// ...`)
20//!   - Shell style line comments (`# ...`)
21//!
22//! ## Example
23//!
24//! ```rust
25#![doc = include_str!("../examples/example.rs")]
26//! ```
27
28use std::io::{ErrorKind, Read, Result};
29
30#[derive(Eq, PartialEq, Copy, Clone, Debug)]
31enum State {
32    Top,
33    InString,
34    StringEscape,
35    InComment,
36    InBlockComment,
37    MaybeCommentEnd,
38    InLineComment,
39}
40
41use State::{
42    InBlockComment, InComment, InLineComment, InString, MaybeCommentEnd, StringEscape, Top,
43};
44
45/// A [`Read`] that transforms another [`Read`] so that it changes all comments to spaces so that a downstream json parser
46/// (such as json-serde) doesn't choke on them.
47///
48/// The supported comments are:
49///   - C style block comments (`/* ... */`)
50///   - C style line comments (`// ...`)
51///   - Shell style line comments (`# ...`)
52///
53/// ## Example
54/// ```
55/// use json_strip_comments::StripComments;
56/// use std::io::Read;
57///
58/// let input = r#"{
59/// // c line comment
60/// "a": "comment in string /* a */",
61/// ## shell line comment
62/// } /** end */"#;
63///
64/// let mut stripped = String::new();
65/// StripComments::new(input.as_bytes()).read_to_string(&mut stripped).unwrap();
66///
67/// assert_eq!(stripped, "{
68///                  \n\"a\": \"comment in string /* a */\",
69///                     \n}           ");
70///
71/// ```
72///
73pub struct StripComments<T: Read> {
74    inner: T,
75    state: State,
76    settings: CommentSettings,
77}
78
79impl<T> StripComments<T>
80where
81    T: Read,
82{
83    pub fn new(input: T) -> Self {
84        Self {
85            inner: input,
86            state: Top,
87            settings: CommentSettings::default(),
88        }
89    }
90
91    /// Create a new `StripComments` with settings which may be different from the default.
92    ///
93    /// This is useful if you wish to disable allowing certain kinds of comments.
94    #[inline]
95    pub fn with_settings(settings: CommentSettings, input: T) -> Self {
96        Self {
97            inner: input,
98            state: Top,
99            settings,
100        }
101    }
102}
103
104macro_rules! invalid_data {
105    () => {
106        return Err(ErrorKind::InvalidData.into())
107    };
108}
109
110impl<T> Read for StripComments<T>
111where
112    T: Read,
113{
114    fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
115        let count = self.inner.read(buf)?;
116        if count > 0 {
117            strip_buf(&mut self.state, &mut buf[..count], self.settings, false)?;
118        } else if self.state != Top && self.state != InLineComment {
119            invalid_data!();
120        }
121        Ok(count)
122    }
123}
124
125fn consume_comment_whitespace_until_maybe_bracket(
126    state: &mut State,
127    buf: &mut [u8],
128    i: &mut usize,
129    settings: CommentSettings,
130) -> Result<bool> {
131    *i += 1;
132    while *i < buf.len() {
133        let c = &mut buf[*i];
134        *state = match state {
135            Top => {
136                *state = top(c, settings);
137                if c.is_ascii_whitespace() {
138                    *i += 1;
139                    continue;
140                }
141                return Ok(*c == b'}' || *c == b']');
142            }
143            InString => in_string(*c),
144            StringEscape => InString,
145            InComment => in_comment(c, settings)?,
146            InBlockComment => consume_block_comments(buf, i),
147            MaybeCommentEnd => maybe_comment_end(c),
148            InLineComment => consume_line_comments(buf, i),
149        };
150        *i += 1;
151    }
152    Ok(false)
153}
154
155fn strip_buf(
156    state: &mut State,
157    buf: &mut [u8],
158    settings: CommentSettings,
159    remove_trailing_commas: bool,
160) -> Result<()> {
161    let mut i = 0;
162    let len = buf.len();
163    while i < len {
164        let c = &mut buf[i];
165        if matches!(state, Top) {
166            let cur = i;
167            *state = top(c, settings);
168            if remove_trailing_commas
169                && *c == b','
170                && consume_comment_whitespace_until_maybe_bracket(state, buf, &mut i, settings)?
171            {
172                buf[cur] = b' ';
173            }
174        } else {
175            *state = match state {
176                Top => unreachable!(),
177                InString => in_string(*c),
178                StringEscape => InString,
179                InComment => in_comment(c, settings)?,
180                InBlockComment => consume_block_comments(buf, &mut i),
181                MaybeCommentEnd => maybe_comment_end(c),
182                InLineComment => consume_line_comments(buf, &mut i),
183            }
184        }
185        i += 1;
186    }
187    Ok(())
188}
189
190#[inline]
191fn consume_line_comments(buf: &mut [u8], i: &mut usize) -> State {
192    let cur = *i;
193    match memchr::memchr(b'\n', &buf[*i..]) {
194        Some(offset) => {
195            *i += offset;
196            buf[cur..*i].fill(b' ');
197            Top
198        }
199        None => {
200            *i = buf.len() - 1;
201            buf[cur..].fill(b' ');
202            InLineComment
203        }
204    }
205}
206
207#[inline]
208fn consume_block_comments(buf: &mut [u8], i: &mut usize) -> State {
209    let cur = *i;
210    match memchr::memchr(b'*', &buf[*i..]) {
211        Some(offset) => {
212            *i += offset;
213            buf[cur..=*i].fill(b' ');
214            MaybeCommentEnd
215        }
216        None => {
217            *i = buf.len() - 1;
218            buf[cur..].fill(b' ');
219            InBlockComment
220        }
221    }
222}
223
224/// Strips comments from a string in place, replacing it with whitespaces.
225///
226/// /// ## Example
227/// ```
228/// use json_strip_comments::{strip_comments_in_place, CommentSettings};
229///
230/// let mut string = String::from(r#"{
231/// // c line comment
232/// "a": "comment in string /* a */",
233/// ## shell line comment
234/// } /** end */"#);
235///
236/// strip_comments_in_place(&mut string, CommentSettings::default(), false).unwrap();
237///
238/// assert_eq!(string, "{
239///                  \n\"a\": \"comment in string /* a */\",
240///                     \n}           ");
241///
242/// ```
243pub fn strip_comments_in_place(
244    s: &mut str,
245    settings: CommentSettings,
246    remove_trailing_commas: bool,
247) -> Result<()> {
248    // Safety: we have made sure the text is UTF-8
249    strip_buf(
250        &mut Top,
251        unsafe { s.as_bytes_mut() },
252        settings,
253        remove_trailing_commas,
254    )
255}
256
257pub fn strip(s: &mut str) -> Result<()> {
258    strip_comments_in_place(s, CommentSettings::all(), true)
259}
260
261/// Settings for `StripComments`
262///
263/// The default is for all comment types to be enabled.
264#[derive(Copy, Clone, Debug)]
265pub struct CommentSettings {
266    /// True if c-style block comments (`/* ... */`) are allowed
267    block_comments: bool,
268    /// True if c-style `//` line comments are allowed
269    slash_line_comments: bool,
270    /// True if shell-style `#` line comments are allowed
271    hash_line_comments: bool,
272}
273
274impl Default for CommentSettings {
275    fn default() -> Self {
276        Self::all()
277    }
278}
279
280impl CommentSettings {
281    /// Enable all comment Styles
282    pub const fn all() -> Self {
283        Self {
284            block_comments: true,
285            slash_line_comments: true,
286            hash_line_comments: true,
287        }
288    }
289    /// Only allow line comments starting with `#`
290    pub const fn hash_only() -> Self {
291        Self {
292            hash_line_comments: true,
293            block_comments: false,
294            slash_line_comments: false,
295        }
296    }
297    /// Only allow "c-style" comments.
298    ///
299    /// Specifically, line comments beginning with `//` and
300    /// block comment like `/* ... */`.
301    pub const fn c_style() -> Self {
302        Self {
303            block_comments: true,
304            slash_line_comments: true,
305            hash_line_comments: false,
306        }
307    }
308
309    /// Create a new `StripComments` for `input`, using these settings.
310    ///
311    /// Transform `input` into a [`Read`] that strips out comments.
312    /// The types of comments to support are determined by the configuration of
313    /// `self`.
314    ///
315    /// ## Examples
316    ///
317    /// ```
318    /// use json_strip_comments::CommentSettings;
319    /// use std::io::Read;
320    ///
321    /// let input = r#"{
322    /// // c line comment
323    /// "a": "b"
324    /// /** multi line
325    /// comment
326    /// */ }"#;
327    ///
328    /// let mut stripped = String::new();
329    /// CommentSettings::c_style().strip_comments(input.as_bytes()).read_to_string(&mut stripped).unwrap();
330    ///
331    /// assert_eq!(stripped, "{
332    ///                  \n\"a\": \"b\"
333    ///                           }");
334    /// ```
335    ///
336    /// ```
337    /// use json_strip_comments::CommentSettings;
338    /// use std::io::Read;
339    ///
340    /// let input = r#"{
341    /// ## shell line comment
342    /// "a": "b"
343    /// }"#;
344    ///
345    /// let mut stripped = String::new();
346    /// CommentSettings::hash_only().strip_comments(input.as_bytes()).read_to_string(&mut stripped).unwrap();
347    ///
348    /// assert_eq!(stripped, "{
349    ///                     \n\"a\": \"b\"\n}");
350    /// ```
351    #[inline]
352    pub fn strip_comments<I: Read>(self, input: I) -> StripComments<I> {
353        StripComments::with_settings(self, input)
354    }
355}
356
357#[inline]
358fn top(c: &mut u8, settings: CommentSettings) -> State {
359    match *c {
360        b'"' => InString,
361        b'/' => {
362            *c = b' ';
363            InComment
364        }
365        b'#' if settings.hash_line_comments => {
366            *c = b' ';
367            InLineComment
368        }
369        _ => Top,
370    }
371}
372
373#[inline]
374fn in_string(c: u8) -> State {
375    match c {
376        b'"' => Top,
377        b'\\' => StringEscape,
378        _ => InString,
379    }
380}
381
382fn in_comment(c: &mut u8, settings: CommentSettings) -> Result<State> {
383    let new_state = match c {
384        b'*' if settings.block_comments => InBlockComment,
385        b'/' if settings.slash_line_comments => InLineComment,
386        _ => {
387            invalid_data!()
388        }
389    };
390    *c = b' ';
391    Ok(new_state)
392}
393
394fn maybe_comment_end(c: &mut u8) -> State {
395    let old = *c;
396    *c = b' ';
397    match old {
398        b'/' => Top,
399        b'*' => MaybeCommentEnd,
400        _ => InBlockComment,
401    }
402}
403
404#[cfg(test)]
405mod tests {
406    use super::*;
407    use std::io::{ErrorKind, Read};
408
409    fn strip_string(input: &str) -> String {
410        let mut out = String::new();
411        let count = StripComments::new(input.as_bytes())
412            .read_to_string(&mut out)
413            .unwrap();
414        assert_eq!(count, input.len());
415        out
416    }
417
418    #[test]
419    fn block_comments() {
420        let json = r#"{/* Comment */"hi": /** abc */ "bye"}"#;
421        let stripped = strip_string(json);
422        assert_eq!(stripped, r#"{             "hi":            "bye"}"#);
423    }
424
425    #[test]
426    fn block_comments_with_possible_end() {
427        let json = r#"{/* Comment*PossibleEnd */"hi": /** abc */ "bye"}"#;
428        let stripped = strip_string(json);
429        assert_eq!(
430            stripped,
431            r#"{                         "hi":            "bye"}"#
432        );
433    }
434
435    // See https://github.com/tmccombs/json-comments-rs/issues/12
436    // Make sure we can parse a block comment that ends with more than one "*"
437    #[test]
438    fn doc_comment() {
439        let json = r##"/** C **/ { "foo": 123 }"##;
440        let stripped = strip_string(json);
441        assert_eq!(stripped, r##"          { "foo": 123 }"##);
442    }
443
444    #[test]
445    fn line_comments() {
446        let json = r#"{
447            // line comment
448            "a": 4,
449            # another
450        }"#;
451
452        let expected = "{
453                           \n            \"a\": 4,
454                     \n        }";
455
456        assert_eq!(strip_string(json), expected);
457    }
458
459    #[test]
460    fn incomplete_string() {
461        let json = r#""foo"#;
462        let mut stripped = String::new();
463
464        let err = StripComments::new(json.as_bytes())
465            .read_to_string(&mut stripped)
466            .unwrap_err();
467        assert_eq!(err.kind(), ErrorKind::InvalidData);
468    }
469
470    #[test]
471    fn incomplete_comment() {
472        let json = "/* foo ";
473        let mut stripped = String::new();
474
475        let err = StripComments::new(json.as_bytes())
476            .read_to_string(&mut stripped)
477            .unwrap_err();
478        assert_eq!(err.kind(), ErrorKind::InvalidData);
479    }
480
481    #[test]
482    fn incomplete_comment2() {
483        let json = "/* foo *";
484        let mut stripped = String::new();
485
486        let err = StripComments::new(json.as_bytes())
487            .read_to_string(&mut stripped)
488            .unwrap_err();
489        assert_eq!(err.kind(), ErrorKind::InvalidData);
490    }
491
492    #[test]
493    fn no_hash_comments() {
494        let json = r#"# bad comment
495        {"a": "b"}"#;
496        let mut stripped = String::new();
497        CommentSettings::c_style()
498            .strip_comments(json.as_bytes())
499            .read_to_string(&mut stripped)
500            .unwrap();
501        assert_eq!(stripped, json);
502    }
503
504    #[test]
505    fn no_slash_line_comments() {
506        let json = r#"// bad comment
507        {"a": "b"}"#;
508        let mut stripped = String::new();
509        let err = CommentSettings::hash_only()
510            .strip_comments(json.as_bytes())
511            .read_to_string(&mut stripped)
512            .unwrap_err();
513        assert_eq!(err.kind(), ErrorKind::InvalidData);
514    }
515
516    #[test]
517    fn no_block_comments() {
518        let json = r#"/* bad comment */ {"a": "b"}"#;
519        let mut stripped = String::new();
520        let err = CommentSettings::hash_only()
521            .strip_comments(json.as_bytes())
522            .read_to_string(&mut stripped)
523            .unwrap_err();
524        assert_eq!(err.kind(), ErrorKind::InvalidData);
525    }
526
527    #[test]
528    fn strip_in_place() {
529        let mut json = String::from(r#"{/* Comment */"hi": /** abc */ "bye"}"#);
530        strip_comments_in_place(&mut json, CommentSettings::default(), false).unwrap();
531        assert_eq!(json, r#"{             "hi":            "bye"}"#);
532    }
533
534    #[test]
535    fn trailing_comma() {
536        let mut json = String::from(
537            r#"{
538            "a1": [1,],
539            "a2": [1,/* x */],
540            "a3": [
541                1, // x
542            ],
543            "o1": {v:1,},
544            "o2": {v:1,/* x */},
545            "o3": {
546                "v":1, // x
547            },
548            # another
549        }"#,
550        );
551        strip_comments_in_place(&mut json, CommentSettings::default(), true).unwrap();
552
553        let expected = r#"{
554            "a1": [1 ],
555            "a2": [1        ],
556            "a3": [
557                1
558            ],
559            "o1": {v:1 },
560            "o2": {v:1        },
561            "o3": {
562                "v":1
563            }
564        }"#;
565
566        assert_eq!(
567            json.replace(|s: char| s.is_ascii_whitespace(), ""),
568            expected.replace(|s: char| s.is_ascii_whitespace(), "")
569        );
570    }
571}