codemod_core/language.rs
1//! Language adapter trait for tree-sitter integration.
2//!
3//! Each programming language supported by codemod-pilot must implement the
4//! [`LanguageAdapter`] trait. This provides tree-sitter grammar access and
5//! language-specific metadata such as file extensions, statement types, and
6//! identifier node types.
7//!
8//! ## Implementing a new language
9//!
10//! ```rust,ignore
11//! use codemod_core::LanguageAdapter;
12//! use tree_sitter::Language;
13//!
14//! pub struct RustAdapter;
15//!
16//! impl LanguageAdapter for RustAdapter {
17//! fn name(&self) -> &str { "rust" }
18//! fn language(&self) -> Language { tree_sitter_rust::LANGUAGE.into() }
19//! fn file_extensions(&self) -> &[&str] { &["rs"] }
20//! fn statement_node_types(&self) -> &[&str] { &["let_declaration", "expression_statement"] }
21//! fn expression_node_types(&self) -> &[&str] { &["call_expression", "binary_expression"] }
22//! fn identifier_node_types(&self) -> &[&str] { &["identifier", "type_identifier"] }
23//! }
24//! ```
25
26use tree_sitter::Language;
27
28/// Trait for language-specific adapters.
29///
30/// Each supported language implements this trait to provide
31/// tree-sitter grammar and language-specific utilities that the
32/// pattern engine uses for parsing, matching, and transformation.
33pub trait LanguageAdapter: Send + Sync {
34 /// Returns the human-readable language name (e.g., `"rust"`, `"javascript"`).
35 fn name(&self) -> &str;
36
37 /// Returns the [`tree_sitter::Language`] grammar used for parsing.
38 fn language(&self) -> Language;
39
40 /// Returns common file extensions for this language (without the leading dot).
41 ///
42 /// # Examples
43 ///
44 /// Rust: `&["rs"]`
45 /// JavaScript: `&["js", "jsx", "mjs"]`
46 fn file_extensions(&self) -> &[&str];
47
48 /// Returns tree-sitter node types that represent "statements".
49 ///
50 /// These are used during pattern inference to decide structural
51 /// boundaries for extraction.
52 fn statement_node_types(&self) -> &[&str];
53
54 /// Returns tree-sitter node types that represent "expressions".
55 fn expression_node_types(&self) -> &[&str];
56
57 /// Returns tree-sitter node types that represent identifiers
58 /// (variable names, type names, etc.).
59 fn identifier_node_types(&self) -> &[&str];
60
61 /// Checks if a file path is supported by this language adapter.
62 ///
63 /// The default implementation matches the file extension against
64 /// [`Self::file_extensions`].
65 fn supports_file(&self, path: &std::path::Path) -> bool {
66 path.extension()
67 .and_then(|ext| ext.to_str())
68 .map(|ext| self.file_extensions().contains(&ext))
69 .unwrap_or(false)
70 }
71
72 /// Returns true if the given tree-sitter node kind represents a leaf
73 /// token (identifier or literal) that could become a pattern variable.
74 fn is_leaf_variable_candidate(&self, node_kind: &str) -> bool {
75 self.identifier_node_types().contains(&node_kind)
76 }
77
78 /// Returns true if the given tree-sitter node kind is a structural
79 /// container (statement or expression) that should be compared recursively.
80 fn is_structural_node(&self, node_kind: &str) -> bool {
81 self.statement_node_types().contains(&node_kind)
82 || self.expression_node_types().contains(&node_kind)
83 }
84
85 /// Parse source code into a tree-sitter [`Tree`](tree_sitter::Tree).
86 ///
87 /// This is a convenience method that creates a parser, sets the language,
88 /// and parses the given source.
89 fn parse(&self, source: &str) -> std::result::Result<tree_sitter::Tree, String> {
90 let mut parser = tree_sitter::Parser::new();
91 parser
92 .set_language(&self.language())
93 .map_err(|e| format!("Failed to set language: {e}"))?;
94 parser
95 .parse(source, None)
96 .ok_or_else(|| "tree-sitter returned no tree".to_string())
97 }
98}