json_strip_comments/
lib.rs

1//! Replace json comments and trailing commas in place.
2//!
3//! A fork of a fork:
4//!
5//! * <https://github.com/tmccombs/json-comments-rs>
6//! * <https://github.com/parcel-bundler/parcel/pull/9032>
7//!
8//! `json-strip-comments` is a library to strip out comments from JSON. By processing text
9//! through a [`StripComments`] adapter first, it is possible to use a standard JSON parser (such
10//! as [serde_json](https://crates.io/crates/serde_json) with quasi-json input that contains
11//! comments.
12//!
13//! In fact, this code makes few assumptions about the input and could probably be used to strip
14//! comments out of other types of code as well, provided that strings use double quotes and
15//! backslashes are used for escapes in strings.
16//!
17//! The following types of comments are supported:
18//!   - C style block comments (`/* ... */`)
19//!   - C style line comments (`// ...`)
20//!   - Shell style line comments (`# ...`)
21//!
22//! ## Example
23//!
24//! ```rust
25#![doc = include_str!("../examples/example.rs")]
26//! ```
27
28use std::io::{ErrorKind, Read, Result};
29
30#[derive(Eq, PartialEq, Copy, Clone, Debug)]
31#[repr(u8)]
32enum State {
33    Top,
34    InString,
35    StringEscape,
36    InComment,
37    InBlockComment,
38    MaybeCommentEnd,
39    InLineComment,
40}
41
42use State::{
43    InBlockComment, InComment, InLineComment, InString, MaybeCommentEnd, StringEscape, Top,
44};
45
46/// A [`Read`] that transforms another [`Read`] so that it changes all comments to spaces so that a downstream json parser
47/// (such as json-serde) doesn't choke on them.
48///
49/// The supported comments are:
50///   - C style block comments (`/* ... */`)
51///   - C style line comments (`// ...`)
52///   - Shell style line comments (`# ...`)
53///
54/// ## Example
55/// ```
56/// use json_strip_comments::StripComments;
57/// use std::io::Read;
58///
59/// let input = r#"{
60/// // c line comment
61/// "a": "comment in string /* a */",
62/// ## shell line comment
63/// } /** end */"#;
64///
65/// let mut stripped = String::new();
66/// StripComments::new(input.as_bytes()).read_to_string(&mut stripped).unwrap();
67///
68/// assert_eq!(stripped, "{
69///                  \n\"a\": \"comment in string /* a */\",
70///                     \n}           ");
71///
72/// ```
73///
74pub struct StripComments<T: Read> {
75    inner: T,
76    state: State,
77}
78
79impl<T> StripComments<T>
80where
81    T: Read,
82{
83    pub fn new(input: T) -> Self {
84        Self { inner: input, state: Top }
85    }
86}
87
88impl<T> Read for StripComments<T>
89where
90    T: Read,
91{
92    fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
93        let count = self.inner.read(buf)?;
94        if count > 0 {
95            strip_buf(&mut self.state, &mut buf[..count])?;
96        } else if self.state != Top && self.state != InLineComment {
97            return Err(ErrorKind::InvalidData.into());
98        }
99        Ok(count)
100    }
101}
102
103/// Strips comments from a string in place, replacing it with whitespaces.
104///
105/// /// ## Example
106/// ```
107/// use json_strip_comments::strip_comments_in_place;
108///
109/// let mut string = String::from(r#"{
110/// // c line comment
111/// "a": "comment in string /* a */"
112/// ## shell line comment
113/// } /** end */"#);
114///
115/// strip_comments_in_place(&mut string).unwrap();
116///
117/// assert_eq!(string, "{
118///                  \n\"a\": \"comment in string /* a */\"
119///                     \n}           ");
120///
121/// ```
122#[inline]
123pub fn strip_comments_in_place(s: &mut str) -> Result<()> {
124    // Safety: we have made sure the text is UTF-8
125    strip_buf(&mut Top, unsafe { s.as_bytes_mut() })
126}
127
128#[inline]
129pub fn strip(s: &mut str) -> Result<()> {
130    strip_comments_in_place(s)
131}
132
133#[inline]
134pub fn strip_slice(s: &mut [u8]) -> Result<()> {
135    strip_buf(&mut Top, s)
136}
137
138fn consume_comment_whitespace_until_maybe_bracket(
139    state: &mut State,
140    buf: &mut [u8],
141    i: &mut usize,
142) -> Result<bool> {
143    *i += 1;
144    let len = buf.len();
145    while *i < len {
146        let c = &mut buf[*i];
147        *state = match state {
148            Top => {
149                *state = top(c);
150                if c.is_ascii_whitespace() {
151                    *i += 1;
152                    continue;
153                }
154                return Ok(*c == b'}' || *c == b']');
155            }
156            InString => in_string(*c),
157            StringEscape => InString,
158            InComment => in_comment(c)?,
159            InBlockComment => consume_block_comments(buf, i),
160            MaybeCommentEnd => maybe_comment_end(c),
161            InLineComment => consume_line_comments(buf, i),
162        };
163        *i += 1;
164    }
165    Ok(false)
166}
167
168fn strip_buf(state: &mut State, buf: &mut [u8]) -> Result<()> {
169    let mut i = 0;
170    let len = buf.len();
171
172    // Fast path for Top state which is most common
173    while i < len {
174        let c = &mut buf[i];
175
176        match state {
177            Top => {
178                let cur = i;
179                let new_state = top(c);
180                if *c == b',' {
181                    let mut temp_state = new_state;
182                    if consume_comment_whitespace_until_maybe_bracket(&mut temp_state, buf, &mut i)?
183                    {
184                        buf[cur] = b' ';
185                    }
186                    *state = temp_state;
187                } else {
188                    *state = new_state;
189                }
190            }
191            InString => *state = in_string(*c),
192            StringEscape => *state = InString,
193            InComment => *state = in_comment(c)?,
194            InBlockComment => *state = consume_block_comments(buf, &mut i),
195            MaybeCommentEnd => *state = maybe_comment_end(c),
196            InLineComment => *state = consume_line_comments(buf, &mut i),
197        }
198
199        i += 1;
200    }
201    Ok(())
202}
203
204#[inline(always)]
205fn consume_line_comments(buf: &mut [u8], i: &mut usize) -> State {
206    let cur = *i;
207    let remaining = &buf[*i..];
208    match memchr::memchr(b'\n', remaining) {
209        Some(offset) => {
210            *i += offset;
211            buf[cur..*i].fill(b' ');
212            Top
213        }
214        None => {
215            let len = buf.len();
216            *i = len - 1;
217            buf[cur..len].fill(b' ');
218            InLineComment
219        }
220    }
221}
222
223#[inline(always)]
224fn consume_block_comments(buf: &mut [u8], i: &mut usize) -> State {
225    let cur = *i;
226    let remaining = &buf[*i..];
227    match memchr::memchr(b'*', remaining) {
228        Some(offset) => {
229            *i += offset;
230            buf[cur..=*i].fill(b' ');
231            MaybeCommentEnd
232        }
233        None => {
234            let len = buf.len();
235            *i = len - 1;
236            buf[cur..len].fill(b' ');
237            InBlockComment
238        }
239    }
240}
241
242#[inline(always)]
243fn top(c: &mut u8) -> State {
244    match *c {
245        b'"' => InString,
246        b'/' => {
247            *c = b' ';
248            InComment
249        }
250        b'#' => {
251            *c = b' ';
252            InLineComment
253        }
254        _ => Top,
255    }
256}
257
258#[inline(always)]
259fn in_string(c: u8) -> State {
260    match c {
261        b'"' => Top,
262        b'\\' => StringEscape,
263        _ => InString,
264    }
265}
266
267#[inline]
268fn in_comment(c: &mut u8) -> Result<State> {
269    let new_state = match *c {
270        b'*' => InBlockComment,
271        b'/' => InLineComment,
272        _ => return Err(ErrorKind::InvalidData.into()),
273    };
274    *c = b' ';
275    Ok(new_state)
276}
277
278#[inline]
279fn maybe_comment_end(c: &mut u8) -> State {
280    let old = *c;
281    *c = b' ';
282    match old {
283        b'/' => Top,
284        b'*' => MaybeCommentEnd,
285        _ => InBlockComment,
286    }
287}