use log::debug;
pub(super) fn remove_comments(content: &str) -> String {
enum State {
Normal,
MaybeSlash, LineComment, BlockComment, MaybeEndBlockComment, StringLiteral, StringEscape, CharLiteral, CharEscape, }
let mut result = String::with_capacity(content.len());
let mut state = State::Normal;
let chars = content.chars().peekable();
for c in chars {
match state {
State::Normal => match c {
'/' => state = State::MaybeSlash,
'"' => {
state = State::StringLiteral;
result.push(c);
}
'\'' => {
state = State::CharLiteral;
result.push(c);
}
_ => result.push(c),
},
State::MaybeSlash => match c {
'/' => state = State::LineComment,
'*' => state = State::BlockComment,
_ => {
result.push('/');
match c {
'"' => {
state = State::StringLiteral;
result.push(c);
}
'\'' => {
state = State::CharLiteral;
result.push(c);
}
'/' => {
result.push(c);
}
_ => {
result.push(c);
state = State::Normal; }
}
}
},
State::LineComment => {
if c == '\n' {
result.push(c); state = State::Normal;
}
}
State::BlockComment => {
if c == '*' {
state = State::MaybeEndBlockComment;
}
}
State::MaybeEndBlockComment => match c {
'/' => state = State::Normal,
'*' => {} _ => state = State::BlockComment, },
State::StringLiteral => {
result.push(c);
match c {
'"' => state = State::Normal,
'\\' => state = State::StringEscape,
_ => {}
}
}
State::StringEscape => {
result.push(c); state = State::StringLiteral; }
State::CharLiteral => {
result.push(c);
match c {
'\'' => state = State::Normal,
'\\' => state = State::CharEscape,
_ => {} }
}
State::CharEscape => {
result.push(c); state = State::CharLiteral; }
}
}
if matches!(state, State::MaybeSlash) {
result.push('/');
}
let processed_result = result .lines()
.map(|line| line.trim_end()) .collect::<Vec<&str>>()
.join("\n") .trim() .to_string();
debug!(
"Comment removal applied. Original len: {}, New len: {}",
content.len(),
processed_result.len() );
processed_result }
pub(super) fn remove_empty_lines(content: &str) -> String {
content
.lines()
.filter(|line| !line.trim().is_empty())
.collect::<Vec<&str>>()
.join("\n")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_remove_empty() {
let input = "Line 1\n\n \nLine 4\n";
let expected = "Line 1\nLine 4";
assert_eq!(remove_empty_lines(input), expected);
}
#[test]
fn test_remove_empty_no_empty_lines() {
let input = "Line 1\nLine 2";
let expected = "Line 1\nLine 2";
assert_eq!(remove_empty_lines(input), expected);
}
#[test]
fn test_remove_empty_all_empty() {
let input = "\n \n\t\n";
let expected = "";
assert_eq!(remove_empty_lines(input), expected);
}
#[test]
fn test_remove_empty_trailing_newlines() {
let input = "Line 1\nLine 2\n\n";
let expected = "Line 1\nLine 2"; assert_eq!(remove_empty_lines(input), expected);
}
#[test]
fn test_remove_empty_leading_newlines() {
let input = "\n\nLine 1\nLine 2";
let expected = "Line 1\nLine 2"; assert_eq!(remove_empty_lines(input), expected);
}
#[test]
fn test_remove_empty_windows_newlines() {
let input = "Line 1\r\n\r\n \r\nLine 4\r\n";
let expected = "Line 1\nLine 4";
assert_eq!(remove_empty_lines(input), expected);
}
#[test]
fn test_remove_line_comment_simple() {
let input = "code // comment\nmore code";
let expected = "code\nmore code"; assert_eq!(remove_comments(input), expected);
}
#[test]
fn test_remove_line_comment_no_newline() {
let input = "code // comment";
let expected = "code"; assert_eq!(remove_comments(input), expected);
}
#[test]
fn test_remove_block_comment_simple() {
let input = "code /* comment */ more code";
let expected = "code more code"; assert_eq!(remove_comments(input), expected);
}
#[test]
fn test_remove_block_comment_multiline() {
let input = "code /* comment\n more comment */ more code";
let expected = "code more code";
assert_eq!(remove_comments(input), expected);
}
#[test]
fn test_remove_block_comment_with_stars() {
let input = "code /**** comment ****/ more code";
let expected = "code more code";
assert_eq!(remove_comments(input), expected);
}
#[test]
fn test_remove_block_comment_at_end() {
let input = "code /* comment */";
let expected = "code"; assert_eq!(remove_comments(input), expected);
}
#[test]
fn test_comment_markers_in_strings() {
let input = r#"let s = "// not a comment"; /* also " not start */"#;
let expected = r#"let s = "// not a comment";"#; assert_eq!(remove_comments(input), expected);
}
#[test]
fn test_comment_markers_in_chars() {
let input = r#"let c = '/'; // char comment"#;
let expected = r#"let c = '/';"#; assert_eq!(remove_comments(input), expected);
let input2 = r#"let c = '*'; /* char comment */"#;
let expected2 = r#"let c = '*';"#; assert_eq!(remove_comments(input2), expected2);
}
#[test]
fn test_escaped_quotes_in_strings() {
let input = r#"let s = "string with \" quote"; // comment"#;
let expected = r#"let s = "string with \" quote";"#; assert_eq!(remove_comments(input), expected);
}
#[test]
fn test_escaped_slash_before_comment() {
let input = r#"let path = "\\\\server\\share"; // comment"#;
let expected = r#"let path = "\\\\server\\share";"#; assert_eq!(remove_comments(input), expected);
}
#[test]
fn test_division_operator() {
let input = "a = b / c; // divide\nx = y / *p; /* ptr divide */";
let expected = "a = b / c;\nx = y / *p;"; assert_eq!(remove_comments(input), expected);
}
#[test]
fn test_empty_input() {
let input = "";
let expected = "";
assert_eq!(remove_comments(input), expected);
}
#[test]
fn test_only_line_comment() {
let input = "// only comment";
let expected = "";
assert_eq!(remove_comments(input), expected);
}
#[test]
fn test_only_line_comment_with_newline() {
let input = "// only comment\n";
let expected = ""; assert_eq!(remove_comments(input), expected);
}
#[test]
fn test_only_block_comment() {
let input = "/* only comment */";
let expected = "";
assert_eq!(remove_comments(input), expected);
}
#[test]
fn test_only_block_comment_multiline() {
let input = "/* only \n comment */";
let expected = "";
assert_eq!(remove_comments(input), expected);
}
#[test]
fn test_block_comment_unterminated() {
let input = "code /* comment";
let expected = "code"; assert_eq!(remove_comments(input), expected);
}
#[test]
fn test_ends_with_slash() {
let input = "code /";
let expected = "code /";
assert_eq!(remove_comments(input), expected);
}
#[test]
fn test_ends_with_star_in_block() {
let input = "code /* comment *";
let expected = "code"; assert_eq!(remove_comments(input), expected);
}
#[test]
fn test_adjacent_block_comments() {
let input = "code /* first */ /* second */";
let expected = "code"; assert_eq!(remove_comments(input), expected);
}
#[test]
fn test_adjacent_line_comments() {
let input = "code // first \n// second\nend";
let expected = "code\n\nend"; assert_eq!(remove_comments(input), expected);
}
#[test]
fn test_block_comment_inside_line_comment() {
let input = "code // line /* block */ comment\nend";
let expected = "code\nend"; assert_eq!(remove_comments(input), expected);
}
#[test]
fn test_line_comment_inside_block_comment() {
let input = "code /* block // line \n comment */ end";
let expected = "code end";
assert_eq!(remove_comments(input), expected);
}
#[test]
fn test_string_with_comment_markers_and_escapes() {
let input =
r#"str = "/* not comment */ // also not comment \" escaped quote"; // real comment"#;
let expected = r#"str = "/* not comment */ // also not comment \" escaped quote";"#; assert_eq!(remove_comments(input), expected);
}
#[test]
fn test_char_with_comment_markers_and_escapes() {
let input = r#"
let c1 = '/'; // comment 1
let c2 = '*'; /* comment 2 */
let c3 = '\\'; // comment 3
let c4 = '\''; // comment 4
"#;
let expected = "let c1 = '/';\nlet c2 = '*';\nlet c3 = '\\\\';\nlet c4 = '\\'';";
assert_eq!(remove_comments(input), expected);
}
#[test]
fn test_mixed_comments_and_code() {
let input = r#"
int main() { // start main
/* block comment
here */
printf("Hello // World\n"); /* Print */
// return 0;
return 1; /* Success? */
} // end main"#;
let expected = "int main() {\n\n printf(\"Hello // World\\n\");\n\n return 1;\n }";
assert_eq!(remove_comments(input), expected);
}
#[test]
fn test_tricky_slashes_and_stars() {
let input = "a = b / *p; // divide by pointer value\n c = d */e; /* incorrect comment? */";
let expected = "a = b / *p;\n c = d */e;"; assert_eq!(remove_comments(input), expected);
let input2 = "a = b / * p * / c; /* comment */"; let expected2 = "a = b / * p * / c;"; assert_eq!(remove_comments(input2), expected2);
}
#[test]
fn test_slash_then_quote() {
let input = r#"x = y / "/"; // divide by string"#;
let expected = r#"x = y / "/";"#; assert_eq!(remove_comments(input), expected);
}
}