uira_comment_checker/
detector.rs1use streaming_iterator::StreamingIterator;
2use tree_sitter::{Parser, QueryCursor};
3
4use crate::languages::{get_comment_query, LanguageRegistry};
5use crate::models::{CommentInfo, CommentType};
6
7pub struct CommentDetector {
8 registry: LanguageRegistry,
9}
10
11impl CommentDetector {
12 pub fn new() -> Self {
13 Self {
14 registry: LanguageRegistry::new(),
15 }
16 }
17
18 pub fn detect(
19 &self,
20 content: &str,
21 file_path: &str,
22 include_docstrings: bool,
23 ) -> Vec<CommentInfo> {
24 let lang_name = match self.registry.get_language_name(file_path) {
25 Some(name) => name,
26 None => return Vec::new(),
27 };
28
29 let language = match self.registry.get_language(lang_name) {
30 Some(lang) => lang,
31 None => return Vec::new(),
32 };
33
34 let mut parser = Parser::new();
35 if parser.set_language(&language).is_err() {
36 return Vec::new();
37 }
38
39 let tree = match parser.parse(content, None) {
40 Some(tree) => tree,
41 None => return Vec::new(),
42 };
43
44 let query = match get_comment_query(lang_name, language) {
45 Some(q) => q,
46 None => return Vec::new(),
47 };
48
49 let mut cursor = QueryCursor::new();
50 let mut matches = cursor.matches(&query, tree.root_node(), content.as_bytes());
51
52 let mut comments = Vec::new();
53 while let Some(m) = matches.next() {
54 for capture in m.captures {
55 let node = capture.node;
56 let text = node.utf8_text(content.as_bytes()).unwrap_or("").to_string();
57 let line_number = node.start_position().row + 1;
58 let node_type = node.kind();
59
60 let comment_type = self.determine_comment_type(&text, node_type);
61 let is_docstring = comment_type == CommentType::Docstring;
62
63 if is_docstring && !include_docstrings {
64 continue;
65 }
66
67 comments.push(CommentInfo::new(
68 text,
69 line_number,
70 file_path.to_string(),
71 comment_type,
72 ));
73 }
74 }
75
76 comments
77 }
78
79 fn determine_comment_type(&self, text: &str, node_type: &str) -> CommentType {
80 let stripped = text.trim();
81
82 if node_type == "line_comment" {
83 return CommentType::Line;
84 }
85 if node_type == "block_comment" {
86 return CommentType::Block;
87 }
88
89 if stripped.starts_with("\"\"\"") || stripped.starts_with("'''") {
90 return CommentType::Docstring;
91 }
92
93 if stripped.starts_with("//") || stripped.starts_with("#") {
94 return CommentType::Line;
95 }
96
97 if stripped.starts_with("/*") || stripped.starts_with("<!--") || stripped.starts_with("--")
98 {
99 return CommentType::Block;
100 }
101
102 CommentType::Line
103 }
104}
105
106impl Default for CommentDetector {
107 fn default() -> Self {
108 Self::new()
109 }
110}
111
112#[cfg(test)]
113mod tests {
114 use super::*;
115
116 #[test]
117 fn test_detect_python_comments() {
118 let detector = CommentDetector::new();
119 let code = r#"
120# This is a comment
121x = 1 # inline comment
122"#;
123 let comments = detector.detect(code, "test.py", false);
124 assert!(!comments.is_empty());
125 assert!(comments
126 .iter()
127 .any(|c| c.text.contains("This is a comment")));
128 }
129
130 #[test]
131 fn test_detect_rust_comments() {
132 let detector = CommentDetector::new();
133 let code = r#"
134// Line comment
135/* Block comment */
136fn main() {}
137"#;
138 let comments = detector.detect(code, "test.rs", false);
139 assert!(!comments.is_empty());
140 }
141
142 #[test]
143 fn test_unsupported_language() {
144 let detector = CommentDetector::new();
145 let comments = detector.detect("some content", "test.unknown", false);
146 assert!(comments.is_empty());
147 }
148}