tinymist-analysis 0.14.14

Typst Static Analyzers for Tinymist.
//! Convenient utilities to match comment in code.

use itertools::Itertools;

use crate::prelude::*;

/// Extracts the module-level documentation from a source.
pub fn find_module_level_docs(src: &Source) -> Option<String> {
    crate::log_debug_ct!("finding docs at: {id:?}", id = src.id());

    let root = LinkedNode::new(src.root());
    for n in root.children() {
        if n.kind().is_trivia() {
            continue;
        }

        return extract_mod_docs_between(&root, 0..n.offset(), true);
    }

    extract_mod_docs_between(&root, 0..src.text().len(), true)
}

/// Extracts the module-level documentation from a source.
fn extract_mod_docs_between(
    node: &LinkedNode,
    rng: Range<usize>,
    first_group: bool,
) -> Option<String> {
    let mut matcher = DocCommentMatcher {
        strict: true,
        ..Default::default()
    };
    let nodes = node.children();
    'scan_comments: for n in nodes {
        let offset = n.offset();
        if offset < rng.start {
            continue 'scan_comments;
        }
        if offset >= rng.end {
            break 'scan_comments;
        }

        crate::log_debug_ct!("found comment for docs: {:?}: {:?}", n.kind(), n.text());
        if matcher.process(n.get()) {
            if first_group {
                break 'scan_comments;
            }
            matcher.comments.clear();
        }
    }

    matcher.collect()
}

/// A signal raised by the comment group matcher.
pub enum CommentGroupSignal {
    /// A hash marker is found.
    Hash,
    /// A space is found.
    Space,
    /// A line comment is found.
    LineComment,
    /// A block comment is found.
    BlockComment,
    /// The comment group should be broken.
    BreakGroup,
}

/// A matcher that groups comments.
#[derive(Default)]
pub struct CommentGroupMatcher {
    newline_count: u32,
}

impl CommentGroupMatcher {
    /// Resets the matcher. This usually happens after a group is collected or
    /// when some other child item is breaking the comment group manually.
    pub fn reset(&mut self) {
        self.newline_count = 0;
    }

    /// Processes a child relative to some [`SyntaxNode`].
    ///
    /// ## Example
    ///
    /// See [`DocCommentMatcher`] for a real-world example.
    pub fn process(&mut self, n: &SyntaxNode) -> CommentGroupSignal {
        match n.kind() {
            SyntaxKind::Hash => {
                self.newline_count = 0;

                CommentGroupSignal::Hash
            }
            SyntaxKind::Space => {
                if n.text().contains('\n') {
                    self.newline_count += 1;
                }
                if self.newline_count > 1 {
                    return CommentGroupSignal::BreakGroup;
                }

                CommentGroupSignal::Space
            }
            SyntaxKind::Parbreak => {
                self.newline_count = 2;
                CommentGroupSignal::BreakGroup
            }
            SyntaxKind::LineComment => {
                self.newline_count = 0;
                CommentGroupSignal::LineComment
            }
            SyntaxKind::BlockComment => {
                self.newline_count = 0;
                CommentGroupSignal::BlockComment
            }
            _ => {
                self.newline_count = 0;
                CommentGroupSignal::BreakGroup
            }
        }
    }
}

/// A raw comment.
enum RawComment {
    /// A line comment.
    Line(EcoString),
    /// A block comment.
    Block(EcoString),
}

/// A matcher that collects documentation comments.
#[derive(Default)]
pub struct DocCommentMatcher {
    /// The collected comments.
    comments: Vec<RawComment>,
    /// The matcher for grouping comments.
    group_matcher: CommentGroupMatcher,
    /// Whether to strictly match the comment format.
    strict: bool,
}

impl DocCommentMatcher {
    /// Resets the matcher. This usually happens after a group is collected or
    /// when some other child item is breaking the comment group manually.
    pub fn reset(&mut self) {
        self.comments.clear();
        self.group_matcher.reset();
    }

    /// Processes a child relative to some [`SyntaxNode`].
    pub fn process(&mut self, n: &SyntaxNode) -> bool {
        match self.group_matcher.process(n) {
            CommentGroupSignal::LineComment => {
                let text = n.text();
                if !self.strict || text.starts_with("///") {
                    self.comments.push(RawComment::Line(text.clone()));
                }
            }
            CommentGroupSignal::BlockComment => {
                let text = n.text();
                if !self.strict {
                    self.comments.push(RawComment::Block(text.clone()));
                }
            }
            CommentGroupSignal::BreakGroup => {
                return true;
            }
            CommentGroupSignal::Hash | CommentGroupSignal::Space => {}
        }

        false
    }

    /// Collects the comments and returns the result.
    pub fn collect(&mut self) -> Option<String> {
        let comments = &self.comments;
        if comments.is_empty() {
            return None;
        }

        let comments = comments.iter().map(|comment| match comment {
            RawComment::Line(line) => {
                // strip all slash prefix
                line.trim_start_matches('/')
            }
            RawComment::Block(block) => {
                fn remove_comment(text: &str) -> Option<&str> {
                    let mut text = text.strip_prefix("/*")?.strip_suffix("*/")?.trim();
                    // trip start star
                    if text.starts_with('*') {
                        text = text.strip_prefix('*')?.trim();
                    }
                    Some(text)
                }

                remove_comment(block).unwrap_or(block.as_str())
            }
        });
        let comments = comments.collect::<Vec<_>>();

        let dedent = comments
            .iter()
            .flat_map(|line| {
                let mut chars = line.chars();
                let cnt = chars
                    .by_ref()
                    .peeking_take_while(|c| c.is_whitespace())
                    .count();
                chars.next().map(|_| cnt)
            })
            .min()
            .unwrap_or(0);

        let size_hint = comments.iter().map(|comment| comment.len()).sum::<usize>();
        let mut comments = comments
            .iter()
            .map(|comment| comment.chars().skip(dedent).collect::<String>());

        let res = comments.try_fold(String::with_capacity(size_hint), |mut acc, comment| {
            if !acc.is_empty() {
                acc.push('\n');
            }

            acc.push_str(&comment);
            Some(acc)
        });

        self.comments.clear();
        res
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    fn test(it: &str) -> String {
        find_module_level_docs(&Source::detached(it)).unwrap()
    }

    #[test]
    fn simple() {
        assert_eq!(
            test(
                r#"/// foo
/// bar
#let main() = printf("hello World")"#
            ),
            "foo\nbar"
        );
    }

    #[test]
    fn dedent() {
        assert_eq!(
            test(
                r#"/// a
/// b
/// c
#let main() = printf("hello World")"#
            ),
            "a\nb\nc"
        );
        assert_eq!(
            test(
                r#"///a
/// b
/// c
#let main() = printf("hello World")"#
            ),
            "a\n b\n c"
        );
    }

    #[test]
    fn issue_1687_postive() {
        assert_eq!(
            test(
                r#"/// Description.
/// 
/// Note.
#let main() = printf("hello World")"#
            ),
            "Description.\n\nNote."
        );
    }

    #[test]
    fn issue_1687_negative() {
        assert_eq!(
            test(
                r#"/// Description.
///
/// Note.
#let main() = printf("hello World")"#
            ),
            "Description.\n\nNote."
        );
    }
}