1#![doc = include_str!("../examples/example.rs")]
26use std::io::{ErrorKind, Read, Result};
29
30#[derive(Eq, PartialEq, Copy, Clone, Debug)]
31#[repr(u8)]
32enum State {
33 Top,
34 InString,
35 StringEscape,
36 InComment,
37 InBlockComment,
38 MaybeCommentEnd,
39 InLineComment,
40}
41
42use State::{
43 InBlockComment, InComment, InLineComment, InString, MaybeCommentEnd, StringEscape, Top,
44};
45
46pub struct StripComments<T: Read> {
75 inner: T,
76 state: State,
77}
78
79impl<T> StripComments<T>
80where
81 T: Read,
82{
83 pub fn new(input: T) -> Self {
84 Self {
85 inner: input,
86 state: Top,
87 }
88 }
89}
90
91impl<T> Read for StripComments<T>
92where
93 T: Read,
94{
95 fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
96 let count = self.inner.read(buf)?;
97 if count > 0 {
98 strip_buf(&mut self.state, &mut buf[..count])?;
99 } else if self.state != Top && self.state != InLineComment {
100 return Err(ErrorKind::InvalidData.into());
101 }
102 Ok(count)
103 }
104}
105
106pub fn strip_comments_in_place(s: &mut str) -> Result<()> {
126 strip_buf(&mut Top, unsafe { s.as_bytes_mut() })
128}
129
130pub fn strip(s: &mut str) -> Result<()> {
131 strip_comments_in_place(s)
132}
133
134
135fn consume_comment_whitespace_until_maybe_bracket(
136 state: &mut State,
137 buf: &mut [u8],
138 i: &mut usize,
139) -> Result<bool> {
140 *i += 1;
141 let len = buf.len();
142 while *i < len {
143 let c = &mut buf[*i];
144 *state = match state {
145 Top => {
146 *state = top(c);
147 if c.is_ascii_whitespace() {
148 *i += 1;
149 continue;
150 }
151 return Ok(*c == b'}' || *c == b']');
152 }
153 InString => in_string(*c),
154 StringEscape => InString,
155 InComment => in_comment(c)?,
156 InBlockComment => consume_block_comments(buf, i),
157 MaybeCommentEnd => maybe_comment_end(c),
158 InLineComment => consume_line_comments(buf, i),
159 };
160 *i += 1;
161 }
162 Ok(false)
163}
164
165fn strip_buf(state: &mut State, buf: &mut [u8]) -> Result<()> {
166 let mut i = 0;
167 let len = buf.len();
168
169 while i < len {
171 let c = &mut buf[i];
172
173 match state {
174 Top => {
175 let cur = i;
176 let new_state = top(c);
177 if *c == b',' {
178 let mut temp_state = new_state;
179 if consume_comment_whitespace_until_maybe_bracket(&mut temp_state, buf, &mut i)? {
180 buf[cur] = b' ';
181 }
182 *state = temp_state;
183 } else {
184 *state = new_state;
185 }
186 }
187 InString => *state = in_string(*c),
188 StringEscape => *state = InString,
189 InComment => *state = in_comment(c)?,
190 InBlockComment => *state = consume_block_comments(buf, &mut i),
191 MaybeCommentEnd => *state = maybe_comment_end(c),
192 InLineComment => *state = consume_line_comments(buf, &mut i),
193 }
194
195 i += 1;
196 }
197 Ok(())
198}
199
200#[inline(always)]
201fn consume_line_comments(buf: &mut [u8], i: &mut usize) -> State {
202 let cur = *i;
203 let remaining = &buf[*i..];
204 match memchr::memchr(b'\n', remaining) {
205 Some(offset) => {
206 *i += offset;
207 buf[cur..*i].fill(b' ');
208 Top
209 }
210 None => {
211 let len = buf.len();
212 *i = len - 1;
213 buf[cur..len].fill(b' ');
214 InLineComment
215 }
216 }
217}
218
219#[inline(always)]
220fn consume_block_comments(buf: &mut [u8], i: &mut usize) -> State {
221 let cur = *i;
222 let remaining = &buf[*i..];
223 match memchr::memchr(b'*', remaining) {
224 Some(offset) => {
225 *i += offset;
226 buf[cur..=*i].fill(b' ');
227 MaybeCommentEnd
228 }
229 None => {
230 let len = buf.len();
231 *i = len - 1;
232 buf[cur..len].fill(b' ');
233 InBlockComment
234 }
235 }
236}
237
238#[inline(always)]
239fn top(c: &mut u8) -> State {
240 match *c {
241 b'"' => InString,
242 b'/' => {
243 *c = b' ';
244 InComment
245 }
246 b'#' => {
247 *c = b' ';
248 InLineComment
249 }
250 _ => Top,
251 }
252}
253
254#[inline(always)]
255fn in_string(c: u8) -> State {
256 match c {
257 b'"' => Top,
258 b'\\' => StringEscape,
259 _ => InString,
260 }
261}
262
263#[inline]
264fn in_comment(c: &mut u8) -> Result<State> {
265 let new_state = match *c {
266 b'*' => InBlockComment,
267 b'/' => InLineComment,
268 _ => return Err(ErrorKind::InvalidData.into()),
269 };
270 *c = b' ';
271 Ok(new_state)
272}
273
274#[inline]
275fn maybe_comment_end(c: &mut u8) -> State {
276 let old = *c;
277 *c = b' ';
278 match old {
279 b'/' => Top,
280 b'*' => MaybeCommentEnd,
281 _ => InBlockComment,
282 }
283}
284
285#[cfg(test)]
286mod tests {
287 use super::*;
288 use std::io::{ErrorKind, Read};
289
290 fn strip_string(input: &str) -> String {
291 let mut out = String::new();
292 let count = StripComments::new(input.as_bytes())
293 .read_to_string(&mut out)
294 .unwrap();
295 assert_eq!(count, input.len());
296 out
297 }
298
299 #[test]
300 fn block_comments() {
301 let json = r#"{/* Comment */"hi": /** abc */ "bye"}"#;
302 let stripped = strip_string(json);
303 assert_eq!(stripped, r#"{ "hi": "bye"}"#);
304 }
305
306 #[test]
307 fn block_comments_with_possible_end() {
308 let json = r#"{/* Comment*PossibleEnd */"hi": /** abc */ "bye"}"#;
309 let stripped = strip_string(json);
310 assert_eq!(
311 stripped,
312 r#"{ "hi": "bye"}"#
313 );
314 }
315
316 #[test]
319 fn doc_comment() {
320 let json = r##"/** C **/ { "foo": 123 }"##;
321 let stripped = strip_string(json);
322 assert_eq!(stripped, r##" { "foo": 123 }"##);
323 }
324
325 #[test]
326 fn line_comments() {
327 let json = r#"{
328 // line comment
329 "a": 4,
330 # another
331 }"#;
332
333 let expected = "{
334 \n \"a\": 4,
335 \n }";
336
337 assert_eq!(strip_string(json), expected);
338 }
339
340 #[test]
341 fn incomplete_string() {
342 let json = r#""foo"#;
343 let mut stripped = String::new();
344
345 let err = StripComments::new(json.as_bytes())
346 .read_to_string(&mut stripped)
347 .unwrap_err();
348 assert_eq!(err.kind(), ErrorKind::InvalidData);
349 }
350
351 #[test]
352 fn incomplete_comment() {
353 let json = "/* foo ";
354 let mut stripped = String::new();
355
356 let err = StripComments::new(json.as_bytes())
357 .read_to_string(&mut stripped)
358 .unwrap_err();
359 assert_eq!(err.kind(), ErrorKind::InvalidData);
360 }
361
362 #[test]
363 fn incomplete_comment2() {
364 let json = "/* foo *";
365 let mut stripped = String::new();
366
367 let err = StripComments::new(json.as_bytes())
368 .read_to_string(&mut stripped)
369 .unwrap_err();
370 assert_eq!(err.kind(), ErrorKind::InvalidData);
371 }
372
373
374 #[test]
375 fn strip_in_place() {
376 let mut json = String::from(r#"{/* Comment */"hi": /** abc */ "bye"}"#);
377 strip_comments_in_place(&mut json).unwrap();
378 assert_eq!(json, r#"{ "hi": "bye"}"#);
379 }
380
381 #[test]
382 fn trailing_comma() {
383 let mut json = String::from(
384 r#"{
385 "a1": [1,],
386 "a2": [1,/* x */],
387 "a3": [
388 1, // x
389 ],
390 "o1": {v:1,},
391 "o2": {v:1,/* x */},
392 "o3": {
393 "v":1, // x
394 },
395 # another
396 }"#,
397 );
398 strip_comments_in_place(&mut json).unwrap();
399
400 let expected = r#"{
401 "a1": [1 ],
402 "a2": [1 ],
403 "a3": [
404 1
405 ],
406 "o1": {v:1 },
407 "o2": {v:1 },
408 "o3": {
409 "v":1
410 }
411 }"#;
412
413 assert_eq!(
414 json.replace(|s: char| s.is_ascii_whitespace(), ""),
415 expected.replace(|s: char| s.is_ascii_whitespace(), "")
416 );
417 }
418}