swiftide_integrations/treesitter/
outliner.rs

1use anyhow::{Context as _, Result};
2use tree_sitter::{Node, Parser, TreeCursor};
3
4use derive_builder::Builder;
5
6use super::supported_languages::SupportedLanguages;
7
8#[derive(Debug, Builder, Clone)]
9/// Generates a summary of a code file.
10///
11/// It does so by parsing the code file and removing function bodies, leaving only the function
12/// signatures and other top-level declarations along with any comments.
13///
14/// The resulting summary can be used as a context when considering subsets of the code file, or for
15/// determining relevance of the code file to a given task.
16#[builder(setter(into), build_fn(error = "anyhow::Error"))]
17pub struct CodeOutliner {
18    #[builder(setter(custom))]
19    language: SupportedLanguages,
20}
21
22impl CodeOutlinerBuilder {
23    /// Attempts to set the language for the `CodeOutliner`.
24    ///
25    /// # Arguments
26    ///
27    /// * `language` - A value that can be converted into `SupportedLanguages`.
28    ///
29    /// # Returns
30    ///
31    /// * `Result<Self>` - The builder instance with the language set, or an error if the language
32    ///   is not supported.
33    ///
34    /// # Errors
35    /// * If the language is not supported, an error is returned.
36    pub fn try_language(mut self, language: impl TryInto<SupportedLanguages>) -> Result<Self> {
37        self.language = Some(
38            language
39                .try_into()
40                .ok()
41                .context("Treesitter language not supported")?,
42        );
43        Ok(self)
44    }
45}
46
47impl CodeOutliner {
48    /// Creates a new `CodeOutliner` with the specified language
49    ///
50    /// # Arguments
51    ///
52    /// * `language` - The programming language for which the code will be outlined.
53    ///
54    /// # Returns
55    ///
56    /// * `Self` - A new instance of `CodeOutliner`.
57    pub fn new(language: SupportedLanguages) -> Self {
58        Self { language }
59    }
60
61    /// Creates a new builder for `CodeOutliner`.
62    ///
63    /// # Returns
64    ///
65    /// * `CodeOutlinerBuilder` - A new builder instance for `CodeOutliner`.
66    pub fn builder() -> CodeOutlinerBuilder {
67        CodeOutlinerBuilder::default()
68    }
69
70    /// outlines a code file.
71    ///
72    /// # Arguments
73    ///
74    /// * `code` - The source code to be split.
75    ///
76    /// # Returns
77    ///
78    /// * `Result<String>` - A result containing a string, or an error if the code could not be
79    ///   parsed.
80    ///
81    /// # Errors
82    /// * If the code could not be parsed, an error is returned.
83    pub fn outline(&self, code: &str) -> Result<String> {
84        let mut parser = Parser::new();
85        parser.set_language(&self.language.into())?;
86        let tree = parser.parse(code, None).context("No nodes found")?;
87        let root_node = tree.root_node();
88
89        if root_node.has_error() {
90            anyhow::bail!("Root node has invalid syntax");
91        }
92
93        let mut cursor = root_node.walk();
94        let mut summary = String::with_capacity(code.len());
95        let mut last_end = 0;
96        self.outline_node(&mut cursor, code, &mut summary, &mut last_end);
97        Ok(summary)
98    }
99
100    fn is_unneeded_node(&self, node: Node) -> bool {
101        match self.language {
102            SupportedLanguages::Rust | SupportedLanguages::Java | SupportedLanguages::CSharp => {
103                matches!(node.kind(), "block")
104            }
105            SupportedLanguages::Typescript | SupportedLanguages::Javascript => {
106                matches!(node.kind(), "statement_block")
107            }
108            SupportedLanguages::Python => match node.kind() {
109                "block" => {
110                    let parent = node.parent().expect("Python block node has no parent");
111                    parent.kind() == "function_definition"
112                }
113                _ => false,
114            },
115            SupportedLanguages::Ruby => match node.kind() {
116                "body_statement" => {
117                    let parent = node
118                        .parent()
119                        .expect("Ruby body_statement node has no parent");
120                    parent.kind() == "method"
121                }
122                _ => false,
123            },
124            SupportedLanguages::Go => unimplemented!(),
125            SupportedLanguages::Solidity => unimplemented!(),
126            SupportedLanguages::C => unimplemented!(),
127            SupportedLanguages::Cpp => unimplemented!(),
128            SupportedLanguages::Elixir => unimplemented!(),
129            SupportedLanguages::HTML => unimplemented!(),
130            SupportedLanguages::PHP => unimplemented!(),
131        }
132    }
133
134    /// outlines a syntax node
135    ///
136    /// # Arguments
137    ///
138    /// * `node` - The syntax node to be chunked.
139    /// * `source` - The source code as a string.
140    /// * `last_end` - The end byte of the last chunk.
141    ///
142    /// # Returns
143    ///
144    /// * `String` - A summary of the syntax node.
145    fn outline_node(
146        &self,
147        cursor: &mut TreeCursor,
148        source: &str,
149        summary: &mut String,
150        last_end: &mut usize,
151    ) {
152        let node = cursor.node();
153        // If the node is not needed in the summary, skip it and go to the next sibling
154        if self.is_unneeded_node(node) {
155            summary.push_str(&source[*last_end..node.start_byte()]);
156            *last_end = node.end_byte();
157            if cursor.goto_next_sibling() {
158                self.outline_node(cursor, source, summary, last_end);
159            }
160            return;
161        }
162
163        let mut next_cursor = cursor.clone();
164
165        // If the node is a non-leaf, recursively outline its children
166        if next_cursor.goto_first_child() {
167            self.outline_node(&mut next_cursor, source, summary, last_end);
168        // If the node is a leaf, add the text to the summary
169        } else {
170            summary.push_str(&source[*last_end..node.end_byte()]);
171            *last_end = node.end_byte();
172        }
173
174        if cursor.goto_next_sibling() {
175            self.outline_node(cursor, source, summary, last_end);
176        } else {
177            // Done with this node
178        }
179    }
180}
181
182#[cfg(test)]
183mod tests {
184    use super::*;
185
186    // Test every supported language.
187    // We should strip away all code blocks and leave only imports, comments, function signatures,
188    // class, interface and structure definitions and definitions of constants, variables and other
189    // members.
190    #[test]
191    fn test_outline_rust() {
192        let code = r#"
193use anyhow::{Context as _, Result};
194// This is a comment
195fn main(a: usize, b: usize) -> usize {
196    println!("Hello, world!");
197}
198
199pub struct Bla {
200    a: usize
201}
202
203impl Bla {
204    fn ok(&mut self) {
205        self.a = 1;
206    }
207}"#;
208        let outliner = CodeOutliner::new(SupportedLanguages::Rust);
209        let summary = outliner.outline(code).unwrap();
210        assert_eq!(
211            summary,
212            "\nuse anyhow::{Context as _, Result};\n// This is a comment\nfn main(a: usize, b: usize) -> usize \n\npub struct Bla {\n    a: usize\n}\n\nimpl Bla {\n    fn ok(&mut self) \n}"
213        );
214    }
215
216    #[test]
217    fn test_outline_typescript() {
218        let code = r#"
219import { Context as _, Result } from 'anyhow';
220// This is a comment
221function main(a: number, b: number): number {
222    console.log("Hello, world!");
223}
224
225export class Bla {
226    a: number;
227}
228
229export interface Bla {
230    ok(): void;
231}"#;
232        let outliner = CodeOutliner::new(SupportedLanguages::Typescript);
233        let summary = outliner.outline(code).unwrap();
234        assert_eq!(
235            summary,
236            "\nimport { Context as _, Result } from 'anyhow';\n// This is a comment\nfunction main(a: number, b: number): number \n\nexport class Bla {\n    a: number;\n}\n\nexport interface Bla {\n    ok(): void;\n}"
237        );
238    }
239
240    #[test]
241    fn test_outline_python() {
242        let code = r#"
243import sys
244# This is a comment
245def main(a: int, b: int) -> int:
246    print("Hello, world!")
247
248class Bla:
249    def __init__(self):
250        self.a = 1
251
252    def ok(self):
253        self.a = 1
254"#;
255        let outliner = CodeOutliner::new(SupportedLanguages::Python);
256        let summary = outliner.outline(code).unwrap();
257        assert_eq!(
258            summary,
259            "\nimport sys\n# This is a comment\ndef main(a: int, b: int) -> int:\n    \n\nclass Bla:\n    def __init__(self):\n        \n\n    def ok(self):\n        "
260        );
261    }
262
263    #[test]
264    fn test_outline_ruby() {
265        let code = r#"
266require 'anyhow'
267# This is a comment
268def main(a, b)
269    puts "Hello, world!"
270end
271
272class Bla
273    def ok
274        @a = 1
275    end
276end
277"#;
278        let outliner = CodeOutliner::new(SupportedLanguages::Ruby);
279        let summary = outliner.outline(code).unwrap();
280        assert_eq!(
281            summary,
282            "\nrequire 'anyhow'\n# This is a comment\ndef main(a, b)\n    \nend\n\nclass Bla\n    def ok\n        \n    end\nend"
283        );
284    }
285
286    #[test]
287    fn test_outline_javascript() {
288        let code = r#"
289import { Context as _, Result } from 'anyhow';
290// This is a comment
291function main(a, b) {
292    console.log("Hello, world!");
293}
294
295class Bla {
296    constructor() {
297        this.a = 1;
298    }
299
300    ok() {
301        this.a = 1;
302    }
303}
304"#;
305        let outliner = CodeOutliner::new(SupportedLanguages::Javascript);
306        let summary = outliner.outline(code).unwrap();
307        assert_eq!(
308            summary,
309            "\nimport { Context as _, Result } from 'anyhow';\n// This is a comment\nfunction main(a, b) \n\nclass Bla {\n    constructor() \n\n    ok() \n}"
310        );
311    }
312
313    #[test]
314    fn test_outline_java() {
315        let code = r#"
316import java.io.PrintStream;
317import java.util.Scanner;
318
319public class HelloWorld {
320    // This is a comment
321    public static void main(String[] args) {
322        PrintStream out = System.out;
323
324        out.println("Hello, World!");
325    }
326}
327"#;
328        let outliner = CodeOutliner::new(SupportedLanguages::Java);
329        let summary = outliner.outline(code).unwrap();
330        println!("{summary}");
331        assert_eq!(
332            summary,
333            "\nimport java.io.PrintStream;\nimport java.util.Scanner;\n\npublic class HelloWorld {\n    // This is a comment\n    public static void main(String[] args) \n}"
334        );
335    }
336}