swiftide_integrations/treesitter/
outliner.rs

1use anyhow::{Context as _, Result};
2use tree_sitter::{Node, Parser, TreeCursor};
3
4use derive_builder::Builder;
5
6use super::supported_languages::SupportedLanguages;
7
8#[derive(Debug, Builder, Clone)]
9/// Generates a summary of a code file.
10///
11/// It does so by parsing the code file and removing function bodies, leaving only the function
12/// signatures and other top-level declarations along with any comments.
13///
14/// The resulting summary can be used as a context when considering subsets of the code file, or for
15/// determining relevance of the code file to a given task.
16#[builder(setter(into), build_fn(error = "anyhow::Error"))]
17pub struct CodeOutliner {
18    #[builder(setter(custom))]
19    language: SupportedLanguages,
20}
21
22impl CodeOutlinerBuilder {
23    /// Attempts to set the language for the `CodeOutliner`.
24    ///
25    /// # Arguments
26    ///
27    /// * `language` - A value that can be converted into `SupportedLanguages`.
28    ///
29    /// # Returns
30    ///
31    /// * `Result<Self>` - The builder instance with the language set, or an error if the language
32    ///   is not supported.
33    ///
34    /// # Errors
35    /// * If the language is not supported, an error is returned.
36    pub fn try_language(mut self, language: impl TryInto<SupportedLanguages>) -> Result<Self> {
37        self.language = Some(
38            language
39                .try_into()
40                .ok()
41                .context("Treesitter language not supported")?,
42        );
43        Ok(self)
44    }
45}
46
47impl CodeOutliner {
48    /// Creates a new `CodeOutliner` with the specified language
49    ///
50    /// # Arguments
51    ///
52    /// * `language` - The programming language for which the code will be outlined.
53    ///
54    /// # Returns
55    ///
56    /// * `Self` - A new instance of `CodeOutliner`.
57    pub fn new(language: SupportedLanguages) -> Self {
58        Self { language }
59    }
60
61    /// Creates a new builder for `CodeOutliner`.
62    ///
63    /// # Returns
64    ///
65    /// * `CodeOutlinerBuilder` - A new builder instance for `CodeOutliner`.
66    pub fn builder() -> CodeOutlinerBuilder {
67        CodeOutlinerBuilder::default()
68    }
69
70    /// outlines a code file.
71    ///
72    /// # Arguments
73    ///
74    /// * `code` - The source code to be split.
75    ///
76    /// # Returns
77    ///
78    /// * `Result<String>` - A result containing a string, or an error if the code could not be
79    ///   parsed.
80    ///
81    /// # Errors
82    /// * If the code could not be parsed, an error is returned.
83    pub fn outline(&self, code: &str) -> Result<String> {
84        let mut parser = Parser::new();
85        parser.set_language(&self.language.into())?;
86        let tree = parser.parse(code, None).context("No nodes found")?;
87        let root_node = tree.root_node();
88
89        if root_node.has_error() {
90            anyhow::bail!("Root node has invalid syntax");
91        }
92
93        let mut cursor = root_node.walk();
94        let mut summary = String::with_capacity(code.len());
95        let mut last_end = 0;
96        self.outline_node(&mut cursor, code, &mut summary, &mut last_end);
97        Ok(summary)
98    }
99
100    fn is_unneeded_node(&self, node: Node) -> bool {
101        match self.language {
102            SupportedLanguages::Rust | SupportedLanguages::Java => matches!(node.kind(), "block"),
103            SupportedLanguages::Typescript | SupportedLanguages::Javascript => {
104                matches!(node.kind(), "statement_block")
105            }
106            SupportedLanguages::Python => match node.kind() {
107                "block" => {
108                    let parent = node.parent().expect("Python block node has no parent");
109                    parent.kind() == "function_definition"
110                }
111                _ => false,
112            },
113            SupportedLanguages::Ruby => match node.kind() {
114                "body_statement" => {
115                    let parent = node
116                        .parent()
117                        .expect("Ruby body_statement node has no parent");
118                    parent.kind() == "method"
119                }
120                _ => false,
121            },
122            SupportedLanguages::Go => unimplemented!(),
123            SupportedLanguages::Solidity => unimplemented!(),
124            SupportedLanguages::C => unimplemented!(),
125            SupportedLanguages::Cpp => unimplemented!(),
126            SupportedLanguages::Elixir => unimplemented!(),
127            SupportedLanguages::HTML => unimplemented!(),
128            SupportedLanguages::PHP => unimplemented!(),
129        }
130    }
131
132    /// outlines a syntax node
133    ///
134    /// # Arguments
135    ///
136    /// * `node` - The syntax node to be chunked.
137    /// * `source` - The source code as a string.
138    /// * `last_end` - The end byte of the last chunk.
139    ///
140    /// # Returns
141    ///
142    /// * `String` - A summary of the syntax node.
143    fn outline_node(
144        &self,
145        cursor: &mut TreeCursor,
146        source: &str,
147        summary: &mut String,
148        last_end: &mut usize,
149    ) {
150        let node = cursor.node();
151        // If the node is not needed in the summary, skip it and go to the next sibling
152        if self.is_unneeded_node(node) {
153            summary.push_str(&source[*last_end..node.start_byte()]);
154            *last_end = node.end_byte();
155            if cursor.goto_next_sibling() {
156                self.outline_node(cursor, source, summary, last_end);
157            }
158            return;
159        }
160
161        let mut next_cursor = cursor.clone();
162
163        // If the node is a non-leaf, recursively outline its children
164        if next_cursor.goto_first_child() {
165            self.outline_node(&mut next_cursor, source, summary, last_end);
166        // If the node is a leaf, add the text to the summary
167        } else {
168            summary.push_str(&source[*last_end..node.end_byte()]);
169            *last_end = node.end_byte();
170        }
171
172        if cursor.goto_next_sibling() {
173            self.outline_node(cursor, source, summary, last_end);
174        } else {
175            // Done with this node
176        }
177    }
178}
179
180#[cfg(test)]
181mod tests {
182    use super::*;
183
184    // Test every supported language.
185    // We should strip away all code blocks and leave only imports, comments, function signatures,
186    // class, interface and structure definitions and definitions of constants, variables and other
187    // members.
188    #[test]
189    fn test_outline_rust() {
190        let code = r#"
191use anyhow::{Context as _, Result};
192// This is a comment
193fn main(a: usize, b: usize) -> usize {
194    println!("Hello, world!");
195}
196
197pub struct Bla {
198    a: usize
199}
200
201impl Bla {
202    fn ok(&mut self) {
203        self.a = 1;
204    }
205}"#;
206        let outliner = CodeOutliner::new(SupportedLanguages::Rust);
207        let summary = outliner.outline(code).unwrap();
208        assert_eq!(
209            summary,
210            "\nuse anyhow::{Context as _, Result};\n// This is a comment\nfn main(a: usize, b: usize) -> usize \n\npub struct Bla {\n    a: usize\n}\n\nimpl Bla {\n    fn ok(&mut self) \n}"
211        );
212    }
213
214    #[test]
215    fn test_outline_typescript() {
216        let code = r#"
217import { Context as _, Result } from 'anyhow';
218// This is a comment
219function main(a: number, b: number): number {
220    console.log("Hello, world!");
221}
222
223export class Bla {
224    a: number;
225}
226
227export interface Bla {
228    ok(): void;
229}"#;
230        let outliner = CodeOutliner::new(SupportedLanguages::Typescript);
231        let summary = outliner.outline(code).unwrap();
232        assert_eq!(
233            summary,
234            "\nimport { Context as _, Result } from 'anyhow';\n// This is a comment\nfunction main(a: number, b: number): number \n\nexport class Bla {\n    a: number;\n}\n\nexport interface Bla {\n    ok(): void;\n}"
235        );
236    }
237
238    #[test]
239    fn test_outline_python() {
240        let code = r#"
241import sys
242# This is a comment
243def main(a: int, b: int) -> int:
244    print("Hello, world!")
245
246class Bla:
247    def __init__(self):
248        self.a = 1
249
250    def ok(self):
251        self.a = 1
252"#;
253        let outliner = CodeOutliner::new(SupportedLanguages::Python);
254        let summary = outliner.outline(code).unwrap();
255        assert_eq!(
256            summary,
257            "\nimport sys\n# This is a comment\ndef main(a: int, b: int) -> int:\n    \n\nclass Bla:\n    def __init__(self):\n        \n\n    def ok(self):\n        "
258        );
259    }
260
261    #[test]
262    fn test_outline_ruby() {
263        let code = r#"
264require 'anyhow'
265# This is a comment
266def main(a, b)
267    puts "Hello, world!"
268end
269
270class Bla
271    def ok
272        @a = 1
273    end
274end
275"#;
276        let outliner = CodeOutliner::new(SupportedLanguages::Ruby);
277        let summary = outliner.outline(code).unwrap();
278        assert_eq!(
279            summary,
280            "\nrequire 'anyhow'\n# This is a comment\ndef main(a, b)\n    \nend\n\nclass Bla\n    def ok\n        \n    end\nend"
281        );
282    }
283
284    #[test]
285    fn test_outline_javascript() {
286        let code = r#"
287import { Context as _, Result } from 'anyhow';
288// This is a comment
289function main(a, b) {
290    console.log("Hello, world!");
291}
292
293class Bla {
294    constructor() {
295        this.a = 1;
296    }
297
298    ok() {
299        this.a = 1;
300    }
301}
302"#;
303        let outliner = CodeOutliner::new(SupportedLanguages::Javascript);
304        let summary = outliner.outline(code).unwrap();
305        assert_eq!(
306            summary,
307            "\nimport { Context as _, Result } from 'anyhow';\n// This is a comment\nfunction main(a, b) \n\nclass Bla {\n    constructor() \n\n    ok() \n}"
308        );
309    }
310
311    #[test]
312    fn test_outline_java() {
313        let code = r#"
314import java.io.PrintStream;
315import java.util.Scanner;
316
317public class HelloWorld {
318    // This is a comment
319    public static void main(String[] args) {
320        PrintStream out = System.out;
321
322        out.println("Hello, World!");
323    }
324}
325"#;
326        let outliner = CodeOutliner::new(SupportedLanguages::Java);
327        let summary = outliner.outline(code).unwrap();
328        println!("{summary}");
329        assert_eq!(
330            summary,
331            "\nimport java.io.PrintStream;\nimport java.util.Scanner;\n\npublic class HelloWorld {\n    // This is a comment\n    public static void main(String[] args) \n}"
332        );
333    }
334}