json_strip_comments/
lib.rs1#![doc = include_str!("../examples/example.rs")]
26use std::io::{ErrorKind, Read, Result};
29
30#[derive(Eq, PartialEq, Copy, Clone, Debug)]
31#[repr(u8)]
32enum State {
33 Top,
34 InString,
35 StringEscape,
36 InComment,
37 InBlockComment,
38 MaybeCommentEnd,
39 InLineComment,
40}
41
42use State::{
43 InBlockComment, InComment, InLineComment, InString, MaybeCommentEnd, StringEscape, Top,
44};
45
46pub struct StripComments<T: Read> {
75 inner: T,
76 state: State,
77}
78
79impl<T> StripComments<T>
80where
81 T: Read,
82{
83 pub fn new(input: T) -> Self {
84 Self { inner: input, state: Top }
85 }
86}
87
88impl<T> Read for StripComments<T>
89where
90 T: Read,
91{
92 fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
93 let count = self.inner.read(buf)?;
94 if count > 0 {
95 strip_buf(&mut self.state, &mut buf[..count])?;
96 } else if self.state != Top && self.state != InLineComment {
97 return Err(ErrorKind::InvalidData.into());
98 }
99 Ok(count)
100 }
101}
102
103#[inline]
123pub fn strip_comments_in_place(s: &mut str) -> Result<()> {
124 strip_buf(&mut Top, unsafe { s.as_bytes_mut() })
126}
127
128#[inline]
129pub fn strip(s: &mut str) -> Result<()> {
130 strip_comments_in_place(s)
131}
132
133#[inline]
134pub fn strip_slice(s: &mut [u8]) -> Result<()> {
135 strip_buf(&mut Top, s)
136}
137
138fn consume_comment_whitespace_until_maybe_bracket(
139 state: &mut State,
140 buf: &mut [u8],
141 i: &mut usize,
142) -> Result<bool> {
143 *i += 1;
144 let len = buf.len();
145 while *i < len {
146 let c = &mut buf[*i];
147 *state = match state {
148 Top => {
149 *state = top(c);
150 if c.is_ascii_whitespace() {
151 *i += 1;
152 continue;
153 }
154 return Ok(*c == b'}' || *c == b']');
155 }
156 InString => in_string(*c),
157 StringEscape => InString,
158 InComment => in_comment(c)?,
159 InBlockComment => consume_block_comments(buf, i),
160 MaybeCommentEnd => maybe_comment_end(c),
161 InLineComment => consume_line_comments(buf, i),
162 };
163 *i += 1;
164 }
165 Ok(false)
166}
167
168fn strip_buf(state: &mut State, buf: &mut [u8]) -> Result<()> {
169 let mut i = 0;
170 let len = buf.len();
171
172 while i < len {
174 let c = &mut buf[i];
175
176 match state {
177 Top => {
178 let cur = i;
179 let new_state = top(c);
180 if *c == b',' {
181 let mut temp_state = new_state;
182 if consume_comment_whitespace_until_maybe_bracket(&mut temp_state, buf, &mut i)?
183 {
184 buf[cur] = b' ';
185 }
186 *state = temp_state;
187 } else {
188 *state = new_state;
189 }
190 }
191 InString => *state = in_string(*c),
192 StringEscape => *state = InString,
193 InComment => *state = in_comment(c)?,
194 InBlockComment => *state = consume_block_comments(buf, &mut i),
195 MaybeCommentEnd => *state = maybe_comment_end(c),
196 InLineComment => *state = consume_line_comments(buf, &mut i),
197 }
198
199 i += 1;
200 }
201 Ok(())
202}
203
204#[inline(always)]
205fn consume_line_comments(buf: &mut [u8], i: &mut usize) -> State {
206 let cur = *i;
207 let remaining = &buf[*i..];
208 match memchr::memchr(b'\n', remaining) {
209 Some(offset) => {
210 *i += offset;
211 buf[cur..*i].fill(b' ');
212 Top
213 }
214 None => {
215 let len = buf.len();
216 *i = len - 1;
217 buf[cur..len].fill(b' ');
218 InLineComment
219 }
220 }
221}
222
223#[inline(always)]
224fn consume_block_comments(buf: &mut [u8], i: &mut usize) -> State {
225 let cur = *i;
226 let remaining = &buf[*i..];
227 match memchr::memchr(b'*', remaining) {
228 Some(offset) => {
229 *i += offset;
230 buf[cur..=*i].fill(b' ');
231 MaybeCommentEnd
232 }
233 None => {
234 let len = buf.len();
235 *i = len - 1;
236 buf[cur..len].fill(b' ');
237 InBlockComment
238 }
239 }
240}
241
242#[inline(always)]
243fn top(c: &mut u8) -> State {
244 match *c {
245 b'"' => InString,
246 b'/' => {
247 *c = b' ';
248 InComment
249 }
250 b'#' => {
251 *c = b' ';
252 InLineComment
253 }
254 _ => Top,
255 }
256}
257
258#[inline(always)]
259fn in_string(c: u8) -> State {
260 match c {
261 b'"' => Top,
262 b'\\' => StringEscape,
263 _ => InString,
264 }
265}
266
267#[inline]
268fn in_comment(c: &mut u8) -> Result<State> {
269 let new_state = match *c {
270 b'*' => InBlockComment,
271 b'/' => InLineComment,
272 _ => return Err(ErrorKind::InvalidData.into()),
273 };
274 *c = b' ';
275 Ok(new_state)
276}
277
278#[inline]
279fn maybe_comment_end(c: &mut u8) -> State {
280 let old = *c;
281 *c = b' ';
282 match old {
283 b'/' => Top,
284 b'*' => MaybeCommentEnd,
285 _ => InBlockComment,
286 }
287}