#![cfg(feature = "post_process")]
use std::borrow::Cow;
use std::{cmp, slice};
use crate::Error;
const BLANK_START: &[&[u8]] = &[b"lank_", b"!", b"("];
const BLANK_END: &[&[u8]] = &[b";"];
const COMMENT_START: &[&[u8]] = &[b"omment_", b"!", b"("];
const COMMENT_END: &[&[u8]] = &[b")", b";"];
const COMMENT_END2: &[&[u8]] = &[b";"];
const DOC_BLOCK_START: &[&[u8]] = &[b"[", b"doc", b"="];
const DOC_BLOCK_END: &[&[u8]] = &[b"]"];
const EMPTY_COMMENT: &str = "//";
const COMMENT: &str = "// ";
const DOC_COMMENT: &str = "///";
const LF_STR: &str = "\n";
const CRLF_STR: &str = "\r\n";
const CR: u8 = b'\r';
const LF: u8 = b'\n';
const MIN_BUFF_SIZE: usize = 128;
struct CopyingCursor<'a> {
start_idx: usize,
curr_idx: usize,
curr: u8,
iter: slice::Iter<'a, u8>,
source: &'a str,
buffer: String,
}
impl<'a> CopyingCursor<'a> {
fn new(source: &'a str) -> Option<Self> {
let buffer = String::with_capacity(cmp::max(source.len() * 2, MIN_BUFF_SIZE));
let mut iter = source.as_bytes().iter();
iter.next().map(|&ch| Self {
start_idx: 0,
curr_idx: 0,
curr: ch,
iter,
source,
buffer,
})
}
#[inline]
fn next(&mut self) -> Option<u8> {
self.iter.next().map(|&ch| {
self.curr_idx += 1;
self.curr = ch;
ch
})
}
#[inline]
fn copy_to_marker(&mut self, marker: usize, new_start_idx: usize) {
if marker > self.start_idx {
self.buffer.push_str(&self.source[self.start_idx..marker]);
}
self.start_idx = new_start_idx;
}
fn into_buffer(mut self) -> Cow<'a, str> {
if self.start_idx > 0 {
self.copy_to_marker(self.curr_idx + 1, self.curr_idx + 1);
self.buffer.shrink_to_fit();
Cow::Owned(self.buffer)
} else {
Cow::Borrowed(self.source)
}
}
fn skip_block_comment(&mut self) {
enum State {
InComment,
MaybeStarting,
MaybeEnding,
}
let mut nest_level = 1;
let mut state = State::InComment;
while let Some(ch) = self.next() {
match (ch, state) {
(b'*', State::InComment) => {
state = State::MaybeEnding;
}
(b'/', State::MaybeEnding) => {
nest_level -= 1;
if nest_level == 0 {
break;
}
state = State::InComment;
}
(b'*', State::MaybeStarting) => {
nest_level += 1;
state = State::InComment;
}
(b'/', State::InComment) => {
state = State::MaybeStarting;
}
(_, _) => {
state = State::InComment;
}
}
}
}
fn try_skip_comment(&mut self) -> bool {
match self.next() {
Some(b'/') => {
while let Some(ch) = self.next() {
if ch == b'\n' {
break;
}
}
true
}
Some(b'*') => {
self.skip_block_comment();
true
}
_ => false,
}
}
fn skip_string(&mut self) {
let mut in_escape = false;
while let Some(ch) = self.next() {
match ch {
b'"' if !in_escape => break,
b'\\' if !in_escape => in_escape = true,
_ if in_escape => in_escape = false,
_ => {}
}
}
}
fn try_skip_raw_string(&mut self) -> bool {
let pads = match self.next() {
Some(b'#') => {
let mut pads = 1;
while let Some(ch) = self.next() {
match ch {
b'#' => {
pads += 1;
}
b'"' => break,
_ => return false,
}
}
pads
}
Some(b'"') => 0,
_ => return false,
};
#[derive(Clone, Copy)]
enum State {
InRawComment,
MaybeEndingComment(i32),
}
let mut state = State::InRawComment;
while let Some(ch) = self.next() {
match (ch, state) {
(b'"', State::InRawComment) if pads == 0 => break,
(b'"', State::InRawComment) => state = State::MaybeEndingComment(0),
(b'#', State::MaybeEndingComment(pads_seen)) => {
let pads_seen = pads_seen + 1;
if pads_seen == pads {
break;
}
state = State::MaybeEndingComment(pads_seen);
}
(_, _) => {
state = State::InRawComment;
}
}
}
true
}
#[inline]
fn skip_blank_param(&mut self) -> Result<(), Error> {
while let Some(ch) = self.next() {
if ch == b')' {
return Ok(());
}
}
Err(Error::BadSourceCode("Unexpected end of input".to_string()))
}
fn try_skip_string(&mut self) -> Result<Option<u8>, Error> {
while let Some(ch) = self.next() {
if Self::is_whitespace(ch) {
continue;
}
return match ch {
b'"' => {
self.skip_string();
Ok(None)
}
b'r' => {
if self.try_skip_raw_string() {
Ok(None)
} else {
Err(Error::BadSourceCode("Bad raw string".to_string()))
}
}
ch => Ok(Some(ch)),
};
}
Err(Error::BadSourceCode("Unexpected end of input".to_string()))
}
#[inline]
fn is_whitespace(ch: u8) -> bool {
matches!(ch, b' ' | b'\n' | b'\r' | b'\t' | b'\x0b' | b'\x0c')
}
fn try_ws_matches(&mut self, slices: &[&[u8]], allow_whitespace_first: bool) -> bool {
let mut allow_whitespace = allow_whitespace_first;
'top: for &sl in slices {
let first_ch = sl[0];
while let Some(ch) = self.next() {
if ch == first_ch {
let remainder = &sl[1..];
if !remainder.is_empty() && !self.try_match(remainder) {
return false;
}
allow_whitespace = true;
continue 'top;
} else if allow_whitespace && Self::is_whitespace(ch) {
} else {
return false;
}
}
return false;
}
true
}
fn try_match(&mut self, sl: &[u8]) -> bool {
let iter = sl.iter();
for &ch in iter {
if self.next().is_none() {
return false;
}
if self.curr != ch {
return false;
}
}
true
}
#[inline]
fn detect_line_ending(&mut self) -> Option<&'static str> {
match self.next() {
Some(CR) => match self.next() {
Some(LF) => Some(CRLF_STR),
_ => None,
},
Some(LF) => Some(LF_STR),
_ => None,
}
}
#[inline]
fn push_spaces(spaces: usize, buffer: &mut String) {
for _ in 0..spaces {
buffer.push(' ');
}
}
fn process_blanks(
_spaces: usize,
buffer: &mut String,
num: &str,
ending: &str,
) -> Result<(), Error> {
if num.is_empty() {
buffer.push_str(ending);
} else {
let num: syn::LitInt = syn::parse_str(num)?;
let blanks: u32 = num.base10_parse()?;
for _ in 0..blanks {
buffer.push_str(ending);
}
}
Ok(())
}
fn process_comments(
spaces: usize,
buffer: &mut String,
s: &str,
ending: &str,
) -> Result<(), Error> {
if s.is_empty() {
Self::push_spaces(spaces, buffer);
buffer.push_str(EMPTY_COMMENT);
buffer.push_str(ending);
} else {
let s: syn::LitStr = syn::parse_str(s)?;
let comment = s.value();
if comment.is_empty() {
Self::push_spaces(spaces, buffer);
buffer.push_str(EMPTY_COMMENT);
buffer.push_str(ending);
} else {
for line in comment.lines() {
Self::push_spaces(spaces, buffer);
if line.is_empty() {
buffer.push_str(EMPTY_COMMENT);
} else {
buffer.push_str(COMMENT);
buffer.push_str(line);
}
buffer.push_str(ending);
}
}
}
Ok(())
}
fn process_doc_block(
spaces: usize,
buffer: &mut String,
s: &str,
ending: &str,
) -> Result<(), Error> {
if s.is_empty() {
Self::push_spaces(spaces, buffer);
buffer.push_str(DOC_COMMENT);
buffer.push_str(ending);
} else {
let s: syn::LitStr = syn::parse_str(s)?;
let comment = s.value();
if comment.is_empty() {
Self::push_spaces(spaces, buffer);
buffer.push_str(DOC_COMMENT);
buffer.push_str(ending);
} else {
for line in comment.lines() {
Self::push_spaces(spaces, buffer);
buffer.push_str(DOC_COMMENT);
buffer.push_str(line);
buffer.push_str(ending);
}
}
}
Ok(())
}
fn try_match_prefixes(
&mut self,
indent: usize,
chars_matched: usize,
prefixes: &[&[u8]],
allow_ws_first: bool,
) -> Option<(usize, usize)> {
let mark_start_ident = self.curr_idx - ((chars_matched + indent) - 1);
if self.try_ws_matches(prefixes, allow_ws_first) {
let mark_start_value = self.curr_idx + 1;
Some((mark_start_ident, mark_start_value))
} else {
None
}
}
fn try_replace<F>(
&mut self,
spaces: usize,
chars_matched: usize,
suffixes: &[&[u8]],
mark_start_ident: usize,
mark_start_value: usize,
f: F,
) -> Result<(), Error>
where
F: FnOnce(usize, &mut String, &str, &str) -> Result<(), Error>,
{
let mark_end_value = self.curr_idx + (1 - chars_matched);
if !self.try_ws_matches(suffixes, true) {
return Err(Error::BadSourceCode(
"Unable to match suffix on doc block or marker.".to_string(),
));
}
if let Some(ending) = self.detect_line_ending() {
let mark_end_ident = self.curr_idx + 1;
self.copy_to_marker(mark_start_ident, mark_end_ident);
f(
spaces,
&mut self.buffer,
&self.source[mark_start_value..mark_end_value],
ending,
)?;
Ok(())
} else {
Err(Error::BadSourceCode("Expected CR or LF".to_string()))
}
}
fn try_replace_blank_marker(&mut self, spaces: usize) -> Result<bool, Error> {
match self.try_match_prefixes(spaces, 2, BLANK_START, false) {
Some((ident_start, value_start)) => {
self.skip_blank_param()?;
self.try_replace(
spaces,
1,
BLANK_END,
ident_start,
value_start,
CopyingCursor::process_blanks,
)?;
Ok(true)
}
None => Ok(false),
}
}
fn try_replace_comment_marker(&mut self, spaces: usize) -> Result<bool, Error> {
match self.try_match_prefixes(spaces, 2, COMMENT_START, false) {
Some((ident_start, value_start)) => {
let (matched, suffix) = match self.try_skip_string()? {
None => (0, COMMENT_END),
Some(b')') => (1, COMMENT_END2),
Some(ch) => {
return Err(Error::BadSourceCode(format!(
"Expected ')' or string, but got: {}",
ch as char
)))
}
};
self.try_replace(
spaces,
matched,
suffix,
ident_start,
value_start,
CopyingCursor::process_comments,
)?;
Ok(true)
}
None => Ok(false),
}
}
fn try_replace_doc_block(&mut self, spaces: usize) -> Result<bool, Error> {
match self.try_match_prefixes(spaces, 1, DOC_BLOCK_START, true) {
Some((ident_start, value_start)) => {
match self.try_skip_string()? {
None => {
self.try_replace(
spaces,
0,
DOC_BLOCK_END,
ident_start,
value_start,
CopyingCursor::process_doc_block,
)?;
Ok(true)
}
Some(ch) => Err(Error::BadSourceCode(format!(
"Expected string, but got: {}",
ch as char
))),
}
}
None => Ok(false),
}
}
}
pub(crate) fn replace_markers(s: &str, replace_doc_blocks: bool) -> Result<Cow<str>, Error> {
match CopyingCursor::new(s) {
Some(mut cursor) => {
let mut indent = 0;
loop {
match cursor.curr {
b'r' => {
indent = 0;
if !cursor.try_skip_raw_string() {
continue;
}
}
b'\"' => {
indent = 0;
cursor.skip_string()
}
b'/' => {
indent = 0;
if !cursor.try_skip_comment() {
continue;
}
}
b'_' => {
if cursor.next().is_none() {
break;
}
match cursor.curr {
b'b' => {
if !cursor.try_replace_blank_marker(indent)? {
indent = 0;
continue;
}
}
b'c' => {
if !cursor.try_replace_comment_marker(indent)? {
indent = 0;
continue;
}
}
_ => {
indent = 0;
continue;
}
}
indent = 0;
}
b'#' if replace_doc_blocks => {
if !cursor.try_replace_doc_block(indent)? {
indent = 0;
continue;
}
indent = 0;
}
b' ' => {
indent += 1;
}
_ => {
indent = 0;
}
}
if cursor.next().is_none() {
break;
}
}
Ok(cursor.into_buffer())
}
None => Ok(Cow::Borrowed(s)),
}
}
#[cfg(test)]
mod tests {
use std::borrow::Cow;
use pretty_assertions::assert_eq;
use crate::replace::replace_markers;
use crate::Error;
#[test]
fn blank() {
let source = "";
let actual = replace_markers(source, false).unwrap();
let expected = source;
assert_eq!(expected, actual);
assert!(matches!(actual, Cow::Borrowed(_)));
}
#[test]
fn no_replacements() {
let source = r####"// _comment!_("comment");
/* /* nested comment */ */
/// This is a main function
fn main() {
println!("hello world");
println!(r##"hello raw world!"##);
}
_blank!_;
"####;
let actual = replace_markers(source, false).unwrap();
let expected = source;
assert_eq!(expected, actual);
assert!(matches!(actual, Cow::Borrowed(_)));
}
#[test]
fn replace_comments() {
let source = r####"// _comment!_("comment");
/* /* nested comment */ */
_comment_!("comment 1\n\ncomment 2");
_comment_!("test");
_comment!("skip this");
/// This is a main function
fn main() {
println!(r##"hello raw world!"##);
_comment_!(r"");
_comment_!();
println!("hello \nworld");
}
_comment_ !
( r#"This is two
comments"# )
;
_blank!_;
"####;
let actual = replace_markers(source, false).unwrap();
let expected = r####"// _comment!_("comment");
/* /* nested comment */ */
// comment 1
//
// comment 2
// test
_comment!("skip this");
/// This is a main function
fn main() {
println!(r##"hello raw world!"##);
//
//
println!("hello \nworld");
}
// This is two
// comments
_blank!_;
"####;
assert_eq!(expected, actual);
}
#[test]
fn replace_blanks() {
let source = r####"// _blank!_(5);
/* /* nested comment */ */
_blank_!(2);
_blank!_("skip this");
#[doc = "This is a main function"]
fn main() {
let r#test = "hello";
println!(r"hello raw world!");
_blank_!();
println!("hello \nworld");
}
_blank_
!(
2
);
_blank!_;
"####;
let actual = replace_markers(source, false).unwrap();
let expected = r####"// _blank!_(5);
/* /* nested comment */ */
_blank!_("skip this");
#[doc = "This is a main function"]
fn main() {
let r#test = "hello";
println!(r"hello raw world!");
println!("hello \nworld");
}
_blank!_;
"####;
assert_eq!(expected, actual);
}
#[test]
fn replace_doc_blocks() {
let source = r####"// _blank!_(5);
/* not a nested comment */
#[doc = r#" This is a main function"#]
#[doc = r#" This is two doc
comments"#]
#[cfg(feature = "main")]
#[doc(hidden)]
fn main() {
println!(r##"hello raw world!"##);
#[doc = ""]
println!("hello \nworld");
}
# [
doc
=
" this is\n\n three doc comments"
]
fn test() {
}
_blank!_;
"####;
let actual = replace_markers(source, true).unwrap();
let expected = r####"// _blank!_(5);
/* not a nested comment */
/// This is a main function
/// This is two doc
/// comments
#[cfg(feature = "main")]
#[doc(hidden)]
fn main() {
println!(r##"hello raw world!"##);
///
println!("hello \nworld");
}
/// this is
///
/// three doc comments
fn test() {
}
_blank!_;
"####;
assert_eq!(expected, actual);
}
#[test]
fn replace_crlf() {
let source = "_blank_!(2);\r\n";
let actual = replace_markers(source, false).unwrap();
let expected = "\r\n\r\n";
assert_eq!(expected, actual);
}
#[test]
fn marker_end_after_prefix() {
assert!(matches!(
replace_markers("_blank_!(", false),
Err(Error::BadSourceCode(_))
));
}
#[test]
fn marker_param_not_string() {
assert!(matches!(
replace_markers("_comment_!(blah);\n", false),
Err(Error::BadSourceCode(_))
));
}
#[test]
fn marker_bad_suffix() {
assert!(matches!(
replace_markers("_comment_!(\"blah\"];\n", false),
Err(Error::BadSourceCode(_))
));
}
#[test]
fn doc_block_string_not_closed() {
assert!(matches!(
replace_markers("#[doc = \"test]\n", true),
Err(Error::BadSourceCode(_))
));
}
}