Skip to main content

uira_comment_checker/
detector.rs

1use streaming_iterator::StreamingIterator;
2use tree_sitter::{Parser, QueryCursor};
3
4use crate::languages::{get_comment_query, LanguageRegistry};
5use crate::models::{CommentInfo, CommentType};
6
7pub struct CommentDetector {
8    registry: LanguageRegistry,
9}
10
11impl CommentDetector {
12    pub fn new() -> Self {
13        Self {
14            registry: LanguageRegistry::new(),
15        }
16    }
17
18    pub fn detect(
19        &self,
20        content: &str,
21        file_path: &str,
22        include_docstrings: bool,
23    ) -> Vec<CommentInfo> {
24        let lang_name = match self.registry.get_language_name(file_path) {
25            Some(name) => name,
26            None => return Vec::new(),
27        };
28
29        let language = match self.registry.get_language(lang_name) {
30            Some(lang) => lang,
31            None => return Vec::new(),
32        };
33
34        let mut parser = Parser::new();
35        if parser.set_language(&language).is_err() {
36            return Vec::new();
37        }
38
39        let tree = match parser.parse(content, None) {
40            Some(tree) => tree,
41            None => return Vec::new(),
42        };
43
44        let query = match get_comment_query(lang_name, language) {
45            Some(q) => q,
46            None => return Vec::new(),
47        };
48
49        let mut cursor = QueryCursor::new();
50        let mut matches = cursor.matches(&query, tree.root_node(), content.as_bytes());
51
52        let mut comments = Vec::new();
53        while let Some(m) = matches.next() {
54            for capture in m.captures {
55                let node = capture.node;
56                let text = node.utf8_text(content.as_bytes()).unwrap_or("").to_string();
57                let line_number = node.start_position().row + 1;
58                let node_type = node.kind();
59
60                let comment_type = self.determine_comment_type(&text, node_type);
61                let is_docstring = comment_type == CommentType::Docstring;
62
63                if is_docstring && !include_docstrings {
64                    continue;
65                }
66
67                comments.push(CommentInfo::new(
68                    text,
69                    line_number,
70                    file_path.to_string(),
71                    comment_type,
72                ));
73            }
74        }
75
76        comments
77    }
78
79    fn determine_comment_type(&self, text: &str, node_type: &str) -> CommentType {
80        let stripped = text.trim();
81
82        if node_type == "line_comment" {
83            return CommentType::Line;
84        }
85        if node_type == "block_comment" {
86            return CommentType::Block;
87        }
88
89        if stripped.starts_with("\"\"\"") || stripped.starts_with("'''") {
90            return CommentType::Docstring;
91        }
92
93        if stripped.starts_with("//") || stripped.starts_with("#") {
94            return CommentType::Line;
95        }
96
97        if stripped.starts_with("/*") || stripped.starts_with("<!--") || stripped.starts_with("--")
98        {
99            return CommentType::Block;
100        }
101
102        CommentType::Line
103    }
104}
105
106impl Default for CommentDetector {
107    fn default() -> Self {
108        Self::new()
109    }
110}
111
112#[cfg(test)]
113mod tests {
114    use super::*;
115
116    #[test]
117    fn test_detect_python_comments() {
118        let detector = CommentDetector::new();
119        let code = r#"
120# This is a comment
121x = 1  # inline comment
122"#;
123        let comments = detector.detect(code, "test.py", false);
124        assert!(!comments.is_empty());
125        assert!(comments
126            .iter()
127            .any(|c| c.text.contains("This is a comment")));
128    }
129
130    #[test]
131    fn test_detect_rust_comments() {
132        let detector = CommentDetector::new();
133        let code = r#"
134// Line comment
135/* Block comment */
136fn main() {}
137"#;
138        let comments = detector.detect(code, "test.rs", false);
139        assert!(!comments.is_empty());
140    }
141
142    #[test]
143    fn test_unsupported_language() {
144        let detector = CommentDetector::new();
145        let comments = detector.detect("some content", "test.unknown", false);
146        assert!(comments.is_empty());
147    }
148}