1#![doc = include_str!("../examples/example.rs")]
26use std::io::{ErrorKind, Read, Result};
29
30#[derive(Eq, PartialEq, Copy, Clone, Debug)]
31enum State {
32 Top,
33 InString,
34 StringEscape,
35 InComment,
36 InBlockComment,
37 MaybeCommentEnd,
38 InLineComment,
39}
40
41use State::{
42 InBlockComment, InComment, InLineComment, InString, MaybeCommentEnd, StringEscape, Top,
43};
44
45pub struct StripComments<T: Read> {
74 inner: T,
75 state: State,
76 settings: CommentSettings,
77}
78
79impl<T> StripComments<T>
80where
81 T: Read,
82{
83 pub fn new(input: T) -> Self {
84 Self {
85 inner: input,
86 state: Top,
87 settings: CommentSettings::default(),
88 }
89 }
90
91 #[inline]
95 pub fn with_settings(settings: CommentSettings, input: T) -> Self {
96 Self {
97 inner: input,
98 state: Top,
99 settings,
100 }
101 }
102}
103
104macro_rules! invalid_data {
105 () => {
106 return Err(ErrorKind::InvalidData.into())
107 };
108}
109
110impl<T> Read for StripComments<T>
111where
112 T: Read,
113{
114 fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
115 let count = self.inner.read(buf)?;
116 if count > 0 {
117 strip_buf(&mut self.state, &mut buf[..count], self.settings, false)?;
118 } else if self.state != Top && self.state != InLineComment {
119 invalid_data!();
120 }
121 Ok(count)
122 }
123}
124
125fn consume_comment_whitespace_until_maybe_bracket(
126 state: &mut State,
127 buf: &mut [u8],
128 i: &mut usize,
129 settings: CommentSettings,
130) -> Result<bool> {
131 *i += 1;
132 while *i < buf.len() {
133 let c = &mut buf[*i];
134 *state = match state {
135 Top => {
136 *state = top(c, settings);
137 if c.is_ascii_whitespace() {
138 *i += 1;
139 continue;
140 }
141 return Ok(*c == b'}' || *c == b']');
142 }
143 InString => in_string(*c),
144 StringEscape => InString,
145 InComment => in_comment(c, settings)?,
146 InBlockComment => consume_block_comments(buf, i),
147 MaybeCommentEnd => maybe_comment_end(c),
148 InLineComment => consume_line_comments(buf, i),
149 };
150 *i += 1;
151 }
152 Ok(false)
153}
154
155fn strip_buf(
156 state: &mut State,
157 buf: &mut [u8],
158 settings: CommentSettings,
159 remove_trailing_commas: bool,
160) -> Result<()> {
161 let mut i = 0;
162 let len = buf.len();
163 while i < len {
164 let c = &mut buf[i];
165 if matches!(state, Top) {
166 let cur = i;
167 *state = top(c, settings);
168 if remove_trailing_commas
169 && *c == b','
170 && consume_comment_whitespace_until_maybe_bracket(state, buf, &mut i, settings)?
171 {
172 buf[cur] = b' ';
173 }
174 } else {
175 *state = match state {
176 Top => unreachable!(),
177 InString => in_string(*c),
178 StringEscape => InString,
179 InComment => in_comment(c, settings)?,
180 InBlockComment => consume_block_comments(buf, &mut i),
181 MaybeCommentEnd => maybe_comment_end(c),
182 InLineComment => consume_line_comments(buf, &mut i),
183 }
184 }
185 i += 1;
186 }
187 Ok(())
188}
189
190#[inline]
191fn consume_line_comments(buf: &mut [u8], i: &mut usize) -> State {
192 let cur = *i;
193 match memchr::memchr(b'\n', &buf[*i..]) {
194 Some(offset) => {
195 *i += offset;
196 buf[cur..*i].fill(b' ');
197 Top
198 }
199 None => {
200 *i = buf.len() - 1;
201 buf[cur..].fill(b' ');
202 InLineComment
203 }
204 }
205}
206
207#[inline]
208fn consume_block_comments(buf: &mut [u8], i: &mut usize) -> State {
209 let cur = *i;
210 match memchr::memchr(b'*', &buf[*i..]) {
211 Some(offset) => {
212 *i += offset;
213 buf[cur..=*i].fill(b' ');
214 MaybeCommentEnd
215 }
216 None => {
217 *i = buf.len() - 1;
218 buf[cur..].fill(b' ');
219 InBlockComment
220 }
221 }
222}
223
224pub fn strip_comments_in_place(
244 s: &mut str,
245 settings: CommentSettings,
246 remove_trailing_commas: bool,
247) -> Result<()> {
248 strip_buf(
250 &mut Top,
251 unsafe { s.as_bytes_mut() },
252 settings,
253 remove_trailing_commas,
254 )
255}
256
257pub fn strip(s: &mut str) -> Result<()> {
258 strip_comments_in_place(s, CommentSettings::all(), true)
259}
260
261#[derive(Copy, Clone, Debug)]
265pub struct CommentSettings {
266 block_comments: bool,
268 slash_line_comments: bool,
270 hash_line_comments: bool,
272}
273
274impl Default for CommentSettings {
275 fn default() -> Self {
276 Self::all()
277 }
278}
279
280impl CommentSettings {
281 pub const fn all() -> Self {
283 Self {
284 block_comments: true,
285 slash_line_comments: true,
286 hash_line_comments: true,
287 }
288 }
289 pub const fn hash_only() -> Self {
291 Self {
292 hash_line_comments: true,
293 block_comments: false,
294 slash_line_comments: false,
295 }
296 }
297 pub const fn c_style() -> Self {
302 Self {
303 block_comments: true,
304 slash_line_comments: true,
305 hash_line_comments: false,
306 }
307 }
308
309 #[inline]
352 pub fn strip_comments<I: Read>(self, input: I) -> StripComments<I> {
353 StripComments::with_settings(self, input)
354 }
355}
356
357#[inline]
358fn top(c: &mut u8, settings: CommentSettings) -> State {
359 match *c {
360 b'"' => InString,
361 b'/' => {
362 *c = b' ';
363 InComment
364 }
365 b'#' if settings.hash_line_comments => {
366 *c = b' ';
367 InLineComment
368 }
369 _ => Top,
370 }
371}
372
373#[inline]
374fn in_string(c: u8) -> State {
375 match c {
376 b'"' => Top,
377 b'\\' => StringEscape,
378 _ => InString,
379 }
380}
381
382fn in_comment(c: &mut u8, settings: CommentSettings) -> Result<State> {
383 let new_state = match c {
384 b'*' if settings.block_comments => InBlockComment,
385 b'/' if settings.slash_line_comments => InLineComment,
386 _ => {
387 invalid_data!()
388 }
389 };
390 *c = b' ';
391 Ok(new_state)
392}
393
394fn maybe_comment_end(c: &mut u8) -> State {
395 let old = *c;
396 *c = b' ';
397 match old {
398 b'/' => Top,
399 b'*' => MaybeCommentEnd,
400 _ => InBlockComment,
401 }
402}
403
404#[cfg(test)]
405mod tests {
406 use super::*;
407 use std::io::{ErrorKind, Read};
408
409 fn strip_string(input: &str) -> String {
410 let mut out = String::new();
411 let count = StripComments::new(input.as_bytes())
412 .read_to_string(&mut out)
413 .unwrap();
414 assert_eq!(count, input.len());
415 out
416 }
417
418 #[test]
419 fn block_comments() {
420 let json = r#"{/* Comment */"hi": /** abc */ "bye"}"#;
421 let stripped = strip_string(json);
422 assert_eq!(stripped, r#"{ "hi": "bye"}"#);
423 }
424
425 #[test]
426 fn block_comments_with_possible_end() {
427 let json = r#"{/* Comment*PossibleEnd */"hi": /** abc */ "bye"}"#;
428 let stripped = strip_string(json);
429 assert_eq!(
430 stripped,
431 r#"{ "hi": "bye"}"#
432 );
433 }
434
435 #[test]
438 fn doc_comment() {
439 let json = r##"/** C **/ { "foo": 123 }"##;
440 let stripped = strip_string(json);
441 assert_eq!(stripped, r##" { "foo": 123 }"##);
442 }
443
444 #[test]
445 fn line_comments() {
446 let json = r#"{
447 // line comment
448 "a": 4,
449 # another
450 }"#;
451
452 let expected = "{
453 \n \"a\": 4,
454 \n }";
455
456 assert_eq!(strip_string(json), expected);
457 }
458
459 #[test]
460 fn incomplete_string() {
461 let json = r#""foo"#;
462 let mut stripped = String::new();
463
464 let err = StripComments::new(json.as_bytes())
465 .read_to_string(&mut stripped)
466 .unwrap_err();
467 assert_eq!(err.kind(), ErrorKind::InvalidData);
468 }
469
470 #[test]
471 fn incomplete_comment() {
472 let json = "/* foo ";
473 let mut stripped = String::new();
474
475 let err = StripComments::new(json.as_bytes())
476 .read_to_string(&mut stripped)
477 .unwrap_err();
478 assert_eq!(err.kind(), ErrorKind::InvalidData);
479 }
480
481 #[test]
482 fn incomplete_comment2() {
483 let json = "/* foo *";
484 let mut stripped = String::new();
485
486 let err = StripComments::new(json.as_bytes())
487 .read_to_string(&mut stripped)
488 .unwrap_err();
489 assert_eq!(err.kind(), ErrorKind::InvalidData);
490 }
491
492 #[test]
493 fn no_hash_comments() {
494 let json = r#"# bad comment
495 {"a": "b"}"#;
496 let mut stripped = String::new();
497 CommentSettings::c_style()
498 .strip_comments(json.as_bytes())
499 .read_to_string(&mut stripped)
500 .unwrap();
501 assert_eq!(stripped, json);
502 }
503
504 #[test]
505 fn no_slash_line_comments() {
506 let json = r#"// bad comment
507 {"a": "b"}"#;
508 let mut stripped = String::new();
509 let err = CommentSettings::hash_only()
510 .strip_comments(json.as_bytes())
511 .read_to_string(&mut stripped)
512 .unwrap_err();
513 assert_eq!(err.kind(), ErrorKind::InvalidData);
514 }
515
516 #[test]
517 fn no_block_comments() {
518 let json = r#"/* bad comment */ {"a": "b"}"#;
519 let mut stripped = String::new();
520 let err = CommentSettings::hash_only()
521 .strip_comments(json.as_bytes())
522 .read_to_string(&mut stripped)
523 .unwrap_err();
524 assert_eq!(err.kind(), ErrorKind::InvalidData);
525 }
526
527 #[test]
528 fn strip_in_place() {
529 let mut json = String::from(r#"{/* Comment */"hi": /** abc */ "bye"}"#);
530 strip_comments_in_place(&mut json, CommentSettings::default(), false).unwrap();
531 assert_eq!(json, r#"{ "hi": "bye"}"#);
532 }
533
534 #[test]
535 fn trailing_comma() {
536 let mut json = String::from(
537 r#"{
538 "a1": [1,],
539 "a2": [1,/* x */],
540 "a3": [
541 1, // x
542 ],
543 "o1": {v:1,},
544 "o2": {v:1,/* x */},
545 "o3": {
546 "v":1, // x
547 },
548 # another
549 }"#,
550 );
551 strip_comments_in_place(&mut json, CommentSettings::default(), true).unwrap();
552
553 let expected = r#"{
554 "a1": [1 ],
555 "a2": [1 ],
556 "a3": [
557 1
558 ],
559 "o1": {v:1 },
560 "o2": {v:1 },
561 "o3": {
562 "v":1
563 }
564 }"#;
565
566 assert_eq!(
567 json.replace(|s: char| s.is_ascii_whitespace(), ""),
568 expected.replace(|s: char| s.is_ascii_whitespace(), "")
569 );
570 }
571}