Skip to main content

graphy_parser/
helpers.rs

1use graphy_core::Span;
2use tree_sitter::Node;
3
4pub fn node_text(node: &Node, source: &[u8]) -> String {
5    node.utf8_text(source).unwrap_or("").to_string()
6}
7
8pub fn node_span(node: &Node) -> Span {
9    let start = node.start_position();
10    let end = node.end_position();
11    Span::new(
12        start.row as u32,
13        start.column as u32,
14        end.row as u32,
15        end.column as u32,
16    )
17}
18
19/// Strip comment markers (`//`, `///`, `/*`, `#`, `*`) from doc captures.
20pub fn clean_doc_comment(text: &str) -> String {
21    text.lines()
22        .map(|line| {
23            let trimmed = line.trim();
24            if trimmed.starts_with("///") {
25                trimmed[3..].trim()
26            } else if trimmed.starts_with("//!") {
27                trimmed[3..].trim()
28            } else if trimmed.starts_with("//") {
29                trimmed[2..].trim()
30            } else if trimmed.starts_with("/**") {
31                trimmed[3..].trim()
32            } else if trimmed == "*/" {
33                ""
34            } else if trimmed.starts_with("* ") {
35                trimmed[2..].trim()
36            } else if trimmed == "*" {
37                ""
38            } else if trimmed.starts_with('#') {
39                trimmed[1..].trim()
40            } else {
41                trimmed
42            }
43        })
44        .filter(|line| !line.is_empty())
45        .collect::<Vec<_>>()
46        .join("\n")
47}
48
49/// Language-agnostic check for whether a call expression name looks like a
50/// method call on a local variable/expression, which typically represents a
51/// standard library method chain (e.g. `result.map()`, `vec.push()`,
52/// `self.method()`, `obj.clone()`) rather than a user-defined function call.
53///
54/// Returns `true` if the call should be skipped (noise).
55///
56/// This replaces per-language hardcoded noise lists with a structural
57/// heuristic that works across all languages:
58///
59/// - Bare function calls (`foo()`) → keep (could be user-defined)
60/// - Qualified module/type calls (`Module.func()`, `os.path.join()`) → keep
61/// - Method calls on local variables (`self.x()`, `result.map()`) → skip
62///
63/// The heuristic: if the receiver (part before the last `.`) starts with
64/// a lowercase letter and is a short identifier (not a module path), it's
65/// likely a variable, not a module/class name.
66pub fn is_noise_method_call(name: &str) -> bool {
67    // `::` paths (Rust) are always module/type qualified — never noise
68    if name.contains("::") {
69        return false;
70    }
71
72    // Find the `.` separator for method calls
73    let (receiver, _method) = if let Some(pos) = name.rfind('.') {
74        (&name[..pos], &name[pos + 1..])
75    } else {
76        // No separator — bare function call, not a method chain
77        return false;
78    };
79
80    // If receiver itself contains `.`, it's a deep chain like `a.b.c()`
81    let root = receiver
82        .split('.')
83        .next()
84        .unwrap_or(receiver);
85
86    // `self` / `this` / `super` are always local instance calls
87    if matches!(root, "self" | "this" | "super" | "cls") {
88        return true;
89    }
90
91    // Count the number of dot-separated segments.
92    // `obj.method()` → 2 segments (likely variable.method)
93    // `os.path.join()` → 3 segments (likely module path)
94    let segment_count = name.split('.').count();
95
96    // Multi-segment paths (3+) are almost always module references,
97    // not method chains on variables. Keep them.
98    if segment_count >= 3 {
99        return false;
100    }
101
102    // For 2-segment calls (receiver.method or Module::method):
103    // If the root starts with lowercase, it's likely a local variable
104    // (e.g. `result.map()`, `graph.all_nodes()`, `edge.weight()`)
105    // If uppercase, it's likely a type/module (e.g. `HashMap::new()`,
106    // `Path.join()`, `React.createElement()`)
107    let first_char = root.chars().next().unwrap_or('a');
108    first_char.is_ascii_lowercase()
109}
110
111#[cfg(test)]
112mod tests {
113    use super::*;
114
115    #[test]
116    fn clean_c_style_doc() {
117        let input = "/**\n * Hello world\n * @param x the value\n */";
118        let cleaned = clean_doc_comment(input);
119        assert_eq!(cleaned, "Hello world\n@param x the value");
120    }
121
122    #[test]
123    fn clean_rust_doc() {
124        let input = "/// Hello world\n/// Second line";
125        let cleaned = clean_doc_comment(input);
126        assert_eq!(cleaned, "Hello world\nSecond line");
127    }
128
129    #[test]
130    fn clean_hash_doc() {
131        let input = "# Hello\n# World";
132        let cleaned = clean_doc_comment(input);
133        assert_eq!(cleaned, "Hello\nWorld");
134    }
135
136    #[test]
137    fn noise_method_call_basics() {
138        // Local variable method calls → noise
139        assert!(is_noise_method_call("result.map"));
140        assert!(is_noise_method_call("graph.all_nodes"));
141        assert!(is_noise_method_call("edge.weight"));
142        assert!(is_noise_method_call("vec.push"));
143        assert!(is_noise_method_call("self.method"));
144        assert!(is_noise_method_call("this.setState"));
145
146        // Deep chains (3+ segments) are kept — could be module paths
147        assert!(!is_noise_method_call("node.name.clone"));
148        assert!(!is_noise_method_call("result.unwrap().method"));
149
150        // Bare function calls → NOT noise
151        assert!(!is_noise_method_call("my_function"));
152        assert!(!is_noise_method_call("resolve_calls"));
153
154        // Type/module qualified calls → NOT noise
155        assert!(!is_noise_method_call("HashMap::new"));
156        assert!(!is_noise_method_call("Database::create"));
157        assert!(!is_noise_method_call("Path.join"));
158        assert!(!is_noise_method_call("React.createElement"));
159        assert!(!is_noise_method_call("JSON.parse"));
160        assert!(!is_noise_method_call("Math.floor"));
161
162        // Module paths → NOT noise
163        assert!(!is_noise_method_call("os.path.join"));
164        assert!(!is_noise_method_call("bincode::serialize"));
165        assert!(!is_noise_method_call("Ok"));
166    }
167
168    #[test]
169    fn clean_doc_comment_empty_and_unicode() {
170        // Empty input should produce empty output
171        assert_eq!(clean_doc_comment(""), "");
172
173        // Whitespace-only lines should be filtered out
174        assert_eq!(clean_doc_comment("   \n   \n"), "");
175
176        // Unicode content should be preserved
177        let input = "/// Calculates π (pi) value\n/// Returns: 3.14159…";
178        let cleaned = clean_doc_comment(input);
179        assert!(cleaned.contains("π"));
180        assert!(cleaned.contains("3.14159…"));
181
182        // Mixed comment styles with unicode
183        let input2 = "# 日本語のドキュメント\n# 関数の説明";
184        let cleaned2 = clean_doc_comment(input2);
185        assert_eq!(cleaned2, "日本語のドキュメント\n関数の説明");
186
187        // Emoji in doc comments
188        let input3 = "/// 🚀 Launch the rocket";
189        let cleaned3 = clean_doc_comment(input3);
190        assert!(cleaned3.contains("🚀"));
191    }
192
193    #[test]
194    fn noise_method_call_empty_and_edge_cases() {
195        // Empty string — not a method call, no crash
196        assert!(!is_noise_method_call(""));
197
198        // Leading dot: root is "" which has no first char → default 'a' (lowercase) → noise
199        assert!(is_noise_method_call(".method"));
200
201        // Trailing dot: receiver is "obj" (lowercase root) → noise
202        assert!(is_noise_method_call("obj."));
203
204        // cls is treated like self/this
205        assert!(is_noise_method_call("cls.create"));
206
207        // super is treated as noise
208        assert!(is_noise_method_call("super.init"));
209
210        // Single character receiver starting lowercase
211        assert!(is_noise_method_call("x.foo"));
212
213        // Underscore-prefixed receiver: '_' is not ascii_lowercase, so treated
214        // as a type/module-like call (not noise)
215        assert!(!is_noise_method_call("_private.call"));
216    }
217}