cargo-cheers 0.1.0-alpha.1

Cargo subcommand for Cheers development tooling.
use std::ops::Range;

use crop::Rope;
use proc_macro2::{LineColumn, Span, extra::DelimSpan};
use rustc_lexer::{TokenKind, tokenize};
use syn::spanned::Spanned as _;

#[derive(Clone, Debug)]
pub(crate) struct Comment {
    pub raw: String,
}

#[derive(Clone, Debug)]
pub(crate) struct LocatedComment {
    pub start: usize,
    pub end: usize,
    pub comment: Comment,
}

#[derive(Clone, Debug)]
struct CommentEntry {
    start: usize,
    end: usize,
    comment: Comment,
    consumed: bool,
}

#[derive(Clone, Debug)]
pub(crate) struct Trivia {
    comments: Vec<CommentEntry>,
}

impl Trivia {
    pub fn new(source: &Rope, range: Range<usize>) -> Self {
        let text = source.byte_slice(range.clone()).to_string();
        let mut comments = Vec::new();
        let mut offset = range.start;

        for token in tokenize(&text) {
            let len = token.len;
            let start = offset;
            let end = offset + len;

            if matches!(
                token.kind,
                TokenKind::LineComment | TokenKind::BlockComment { .. }
            ) {
                comments.push(CommentEntry {
                    start,
                    end,
                    comment: Comment {
                        raw: text[(start - range.start)..(end - range.start)].to_string(),
                    },
                    consumed: false,
                });
            }

            offset = end;
        }

        Self { comments }
    }

    pub fn line_column_to_byte(source: &Rope, point: LineColumn) -> Option<usize> {
        if point.line == 0 || point.line > source.line_len() + 1 {
            return None;
        }
        if point.line == source.line_len() + 1 {
            return (point.column == 0).then(|| source.byte_len());
        }

        let line_idx = point.line - 1;
        let line_byte = source.byte_of_line(line_idx);
        let line = source.line(line_idx);
        let char_byte: usize = line.chars().take(point.column).map(|c| c.len_utf8()).sum();
        Some(line_byte + char_byte)
    }

    pub fn span_range(source: &Rope, span: Span) -> Option<Range<usize>> {
        let start = Self::line_column_to_byte(source, span.start())?;
        let end = Self::line_column_to_byte(source, span.end())?;
        Some(start..end)
    }

    pub fn delim_range(source: &Rope, span: DelimSpan) -> Option<Range<usize>> {
        Self::span_range(source, span.span())
    }

    pub fn delim_inner_range(source: &Rope, span: DelimSpan) -> Option<Range<usize>> {
        let start = Self::line_column_to_byte(source, span.open().end())?;
        let end = Self::line_column_to_byte(source, span.close().start())?;
        Some(start..end)
    }

    pub fn has_comments_in_range(&self, range: Range<usize>) -> bool {
        self.comments
            .iter()
            .any(|comment| range.start <= comment.start && comment.end <= range.end)
    }

    pub fn has_comments_in_span(&self, source: &Rope, span: Span) -> bool {
        Self::span_range(source, span)
            .map(|range| self.has_comments_in_range(range))
            .unwrap_or(false)
    }

    pub fn has_comments_in_delim(&self, source: &Rope, span: DelimSpan) -> bool {
        Self::delim_range(source, span)
            .map(|range| self.has_comments_in_range(range))
            .unwrap_or(false)
    }

    pub fn consume_comments_in_span(&mut self, source: &Rope, span: Span) {
        if let Some(range) = Self::span_range(source, span) {
            self.consume_comments_in_range(range);
        }
    }

    pub fn consume_comments_in_range(&mut self, range: Range<usize>) {
        for comment in &mut self.comments {
            if range.start <= comment.start && comment.end <= range.end {
                comment.consumed = true;
            }
        }
    }

    pub fn has_blank_line_in_range(&self, source: &Rope, range: Range<usize>) -> bool {
        let mut range = range;
        range.start = range.start.min(source.byte_len());
        range.end = range.end.min(source.byte_len());

        if range.start >= range.end {
            return false;
        }

        let comment_ranges = self
            .comments
            .iter()
            .filter(|comment| comment.start < range.end && range.start < comment.end)
            .map(|comment| comment.start.max(range.start)..comment.end.min(range.end))
            .collect::<Vec<_>>();

        let mut cursor = range.start;
        let mut after_newline_only_whitespace = false;

        for comment_range in comment_ranges {
            if cursor < comment_range.start
                && range_has_blank_line(
                    source,
                    cursor..comment_range.start,
                    &mut after_newline_only_whitespace,
                )
            {
                return true;
            }

            after_newline_only_whitespace = false;
            cursor = cursor.max(comment_range.end);
        }

        cursor < range.end
            && range_has_blank_line(
                source,
                cursor..range.end,
                &mut after_newline_only_whitespace,
            )
    }

    pub fn take_leading_comments(&mut self, source: &Rope, loc: LineColumn) -> Vec<Comment> {
        self.take_leading_located_comments(source, loc)
            .into_iter()
            .map(|comment| comment.comment)
            .collect()
    }

    pub fn take_leading_located_comments(
        &mut self,
        source: &Rope,
        loc: LineColumn,
    ) -> Vec<LocatedComment> {
        let Some(loc_byte) = Self::line_column_to_byte(source, loc) else {
            return Vec::new();
        };

        let mut cursor = loc_byte;
        let mut indices = Vec::new();

        while let Some((idx, comment)) =
            self.comments.iter().enumerate().rev().find(|(_, comment)| {
                !comment.consumed
                    && comment.end <= cursor
                    && is_whitespace_between(source, comment.end, cursor)
            })
        {
            indices.push(idx);
            cursor = comment.start;
        }

        indices.reverse();

        indices
            .into_iter()
            .map(|idx| {
                self.comments[idx].consumed = true;
                LocatedComment {
                    start: self.comments[idx].start,
                    end: self.comments[idx].end,
                    comment: self.comments[idx].comment.clone(),
                }
            })
            .collect()
    }

    pub fn take_trailing_comment(&mut self, source: &Rope, loc: LineColumn) -> Option<Comment> {
        let loc_byte = Self::line_column_to_byte(source, loc)?;
        let loc_line = source.line_of_byte(loc_byte);

        let (idx, _) = self
            .comments
            .iter()
            .enumerate()
            .filter(|(_, comment)| !comment.consumed && comment.start >= loc_byte)
            .filter(|(_, comment)| source.line_of_byte(comment.start) == loc_line)
            .filter(|(_, comment)| is_whitespace_between(source, loc_byte, comment.start))
            .min_by_key(|(_, comment)| comment.start)?;

        self.comments[idx].consumed = true;
        Some(self.comments[idx].comment.clone())
    }

    pub fn take_comments_in_delim(&mut self, source: &Rope, span: DelimSpan) -> Vec<Comment> {
        self.take_located_comments_in_delim(source, span)
            .into_iter()
            .map(|comment| comment.comment)
            .collect()
    }

    pub fn take_located_comments_in_delim(
        &mut self,
        source: &Rope,
        span: DelimSpan,
    ) -> Vec<LocatedComment> {
        let Some(range) = Self::delim_inner_range(source, span) else {
            return Vec::new();
        };

        self.take_located_comments_in_range(range)
    }

    pub fn take_comments_in_range(&mut self, range: Range<usize>) -> Vec<Comment> {
        self.take_located_comments_in_range(range)
            .into_iter()
            .map(|comment| comment.comment)
            .collect()
    }

    pub fn take_located_comments_in_range(&mut self, range: Range<usize>) -> Vec<LocatedComment> {
        let mut comments = Vec::new();
        for comment in &mut self.comments {
            if !comment.consumed && range.start <= comment.start && comment.end <= range.end {
                comment.consumed = true;
                comments.push(LocatedComment {
                    start: comment.start,
                    end: comment.end,
                    comment: comment.comment.clone(),
                });
            }
        }
        comments
    }
}

fn range_has_blank_line(
    source: &Rope,
    range: Range<usize>,
    after_newline_only_whitespace: &mut bool,
) -> bool {
    for ch in source.byte_slice(range).chars() {
        if ch == '\n' {
            if *after_newline_only_whitespace {
                return true;
            }
            *after_newline_only_whitespace = true;
        } else if !ch.is_whitespace() {
            *after_newline_only_whitespace = false;
        }
    }

    false
}

fn is_whitespace_between(source: &Rope, start: usize, end: usize) -> bool {
    start <= end
        && source
            .byte_slice(start..end)
            .chars()
            .all(char::is_whitespace)
}

#[cfg(test)]
mod test {
    use crop::Rope;

    use super::Trivia;

    #[test]
    fn long_same_line_gap_is_scanned_without_blank_line() {
        let source_text = format!("a{}b", " ".repeat(100_000));
        let source = Rope::from(source_text.as_str());
        let trivia = Trivia::new(&source, 0..source.byte_len());

        assert!(!trivia.has_blank_line_in_range(&source, 1..(source.byte_len() - 1)));
    }
}