use std::io::{ErrorKind, Read, Result};
#[derive(Eq, PartialEq, Copy, Clone, Debug)]
enum State {
Top,
InString,
StringEscape,
InComment,
InBlockComment,
MaybeCommentEnd,
InLineComment,
}
use State::*;
pub struct StripComments<T: Read> {
inner: T,
state: State,
settings: CommentSettings,
}
impl<T> StripComments<T>
where
T: Read,
{
pub fn new(input: T) -> Self {
Self {
inner: input,
state: Top,
settings: CommentSettings::default(),
}
}
#[inline]
pub fn with_settings(settings: CommentSettings, input: T) -> Self {
Self {
inner: input,
state: Top,
settings,
}
}
}
macro_rules! invalid_data {
() => {
return Err(ErrorKind::InvalidData.into())
};
}
impl<T> Read for StripComments<T>
where
T: Read,
{
fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
let count = self.inner.read(buf)?;
if count > 0 {
for c in buf[..count].iter_mut() {
self.state = match self.state {
Top => top(c, &self.settings),
InString => in_string(*c),
StringEscape => InString,
InComment => in_comment(c, &self.settings)?,
InBlockComment => in_block_comment(c),
MaybeCommentEnd => maybe_comment_end(c),
InLineComment => in_line_comment(c),
}
}
} else if self.state != Top && self.state != InLineComment {
invalid_data!();
}
Ok(count)
}
}
#[derive(Copy, Clone, Debug)]
pub struct CommentSettings {
block_comments: bool,
slash_line_comments: bool,
hash_line_comments: bool,
}
impl Default for CommentSettings {
fn default() -> Self {
Self::all()
}
}
impl CommentSettings {
pub const fn all() -> Self {
Self {
block_comments: true,
slash_line_comments: true,
hash_line_comments: true,
}
}
pub const fn hash_only() -> Self {
Self {
hash_line_comments: true,
block_comments: false,
slash_line_comments: false,
}
}
pub const fn c_style() -> Self {
Self {
block_comments: true,
slash_line_comments: true,
hash_line_comments: false,
}
}
#[inline]
pub fn strip_comments<I: Read>(self, input: I) -> StripComments<I> {
StripComments::with_settings(self, input)
}
}
fn top(c: &mut u8, settings: &CommentSettings) -> State {
match *c {
b'"' => InString,
b'/' => {
*c = b' ';
InComment
}
b'#' if settings.hash_line_comments => {
*c = b' ';
InLineComment
}
_ => Top,
}
}
fn in_string(c: u8) -> State {
match c {
b'"' => Top,
b'\\' => StringEscape,
_ => InString,
}
}
fn in_comment(c: &mut u8, settings: &CommentSettings) -> Result<State> {
let new_state = match c {
b'*' if settings.block_comments => InBlockComment,
b'/' if settings.slash_line_comments => InLineComment,
_ => invalid_data!(),
};
*c = b' ';
Ok(new_state)
}
fn in_block_comment(c: &mut u8) -> State {
let old = *c;
*c = b' ';
if old == b'*' {
MaybeCommentEnd
} else {
InBlockComment
}
}
fn maybe_comment_end(c: &mut u8) -> State {
let old = *c;
*c = b' ';
match old {
b'/' => Top,
b'*' => MaybeCommentEnd,
_ => InBlockComment,
}
}
fn in_line_comment(c: &mut u8) -> State {
if *c == b'\n' {
Top
} else {
*c = b' ';
InLineComment
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::{ErrorKind, Read};
fn strip_string(input: &str) -> String {
let mut out = String::new();
let count = StripComments::new(input.as_bytes())
.read_to_string(&mut out)
.unwrap();
assert_eq!(count, input.len());
out
}
#[test]
fn block_comments() {
let json = r#"{/* Comment */"hi": /** abc */ "bye"}"#;
let stripped = strip_string(json);
assert_eq!(stripped, r#"{ "hi": "bye"}"#);
}
#[test]
fn block_comments_with_possible_end() {
let json = r#"{/* Comment*PossibleEnd */"hi": /** abc */ "bye"}"#;
let stripped = strip_string(json);
assert_eq!(
stripped,
r#"{ "hi": "bye"}"#
);
}
#[test]
fn doc_comment() {
let json = r##"/** C **/ { "foo": 123 }"##;
let stripped = strip_string(json);
assert_eq!(stripped, r##" { "foo": 123 }"##);
}
#[test]
fn line_comments() {
let json = r#"{
// line comment
"a": 4,
# another
}"#;
let expected = "{
\n \"a\": 4,
\n }";
assert_eq!(strip_string(json), expected);
}
#[test]
fn incomplete_string() {
let json = r#""foo"#;
let mut stripped = String::new();
let err = StripComments::new(json.as_bytes())
.read_to_string(&mut stripped)
.unwrap_err();
assert_eq!(err.kind(), ErrorKind::InvalidData);
}
#[test]
fn incomplete_comment() {
let json = r#"/* foo "#;
let mut stripped = String::new();
let err = StripComments::new(json.as_bytes())
.read_to_string(&mut stripped)
.unwrap_err();
assert_eq!(err.kind(), ErrorKind::InvalidData);
}
#[test]
fn incomplete_comment2() {
let json = r#"/* foo *"#;
let mut stripped = String::new();
let err = StripComments::new(json.as_bytes())
.read_to_string(&mut stripped)
.unwrap_err();
assert_eq!(err.kind(), ErrorKind::InvalidData);
}
#[test]
fn no_hash_comments() {
let json = r#"# bad comment
{"a": "b"}"#;
let mut stripped = String::new();
CommentSettings::c_style()
.strip_comments(json.as_bytes())
.read_to_string(&mut stripped)
.unwrap();
assert_eq!(stripped, json);
}
#[test]
fn no_slash_line_comments() {
let json = r#"// bad comment
{"a": "b"}"#;
let mut stripped = String::new();
let err = CommentSettings::hash_only()
.strip_comments(json.as_bytes())
.read_to_string(&mut stripped)
.unwrap_err();
assert_eq!(err.kind(), ErrorKind::InvalidData);
}
#[test]
fn no_block_comments() {
let json = r#"/* bad comment */ {"a": "b"}"#;
let mut stripped = String::new();
let err = CommentSettings::hash_only()
.strip_comments(json.as_bytes())
.read_to_string(&mut stripped)
.unwrap_err();
assert_eq!(err.kind(), ErrorKind::InvalidData);
}
}