1use std::sync::atomic::{AtomicUsize, Ordering};
13
14use crate::types::{CstNode, Language, ListOrdering};
15
16static NEXT_ID: AtomicUsize = AtomicUsize::new(1);
17
18fn fresh_id() -> usize {
19 NEXT_ID.fetch_add(1, Ordering::Relaxed)
20}
21
22pub fn reset_ids() {
24 NEXT_ID.store(1, Ordering::Relaxed);
25}
26
27pub fn parse_to_cst(source: &str, lang: Language) -> Result<CstNode, ParseError> {
29 let ts_lang = get_tree_sitter_language(lang)?;
30 let mut parser = tree_sitter::Parser::new();
31 parser
32 .set_language(&ts_lang)
33 .map_err(|e| ParseError::LanguageError(e.to_string()))?;
34
35 let tree = parser.parse(source, None).ok_or(ParseError::ParseFailed)?;
36
37 let root = tree.root_node();
38 Ok(ts_node_to_cst(&root, source.as_bytes()))
39}
40
41fn ts_node_to_cst(node: &tree_sitter::Node, source: &[u8]) -> CstNode {
43 let kind = node.kind().to_string();
44 let id = fresh_id();
45
46 if node.child_count() == 0 {
47 let value = node.utf8_text(source).unwrap_or("").to_string();
49 return CstNode::Leaf { id, kind, value };
50 }
51
52 let children: Vec<CstNode> = (0..node.child_count())
54 .filter_map(|i| node.child(i))
55 .map(|child| ts_node_to_cst(&child, source))
56 .collect();
57
58 let ordering = classify_ordering(&kind);
61
62 if is_list_node(&kind) || children.len() > 3 {
63 CstNode::List {
64 id,
65 kind,
66 ordering,
67 children,
68 }
69 } else {
70 CstNode::Constructed { id, kind, children }
71 }
72}
73
74fn classify_ordering(kind: &str) -> ListOrdering {
78 match kind {
79 "use_declaration_list"
81 | "import_list"
82 | "import_statement"
83 | "imports"
84 | "import_header" => ListOrdering::Unordered,
85 "class_body" | "enum_body" | "interface_body" | "declaration_list" | "companion_object"
87 | "enum_class_body" | "object_declaration" => ListOrdering::Unordered,
88 "table" | "inline_table" | "document" | "table_array_element" => ListOrdering::Unordered,
90 "block_mapping" | "flow_mapping" => ListOrdering::Unordered,
92 _ => ListOrdering::Ordered,
94 }
95}
96
97fn is_list_node(kind: &str) -> bool {
99 kind.contains("block")
100 || kind.contains("body")
101 || kind.contains("list")
102 || kind.contains("statements")
103 || kind.contains("arguments")
104 || kind.contains("parameters")
105 || kind.ends_with("_list")
106 || kind == "program"
107 || kind == "source_file"
108 || kind == "module"
109 || kind == "translation_unit"
110 || kind == "document"
112 || kind == "table"
113 || kind == "array"
114 || kind == "inline_table"
115 || kind == "stream"
117 || kind == "block_node"
118 || kind == "block_mapping"
119 || kind == "block_sequence"
120 || kind == "flow_mapping"
121 || kind == "flow_sequence"
122}
123
124fn get_tree_sitter_language(lang: Language) -> Result<tree_sitter::Language, ParseError> {
126 let lang_ref = match lang {
127 Language::Rust => tree_sitter_rust::LANGUAGE,
128 Language::JavaScript => tree_sitter_javascript::LANGUAGE,
129 Language::TypeScript => tree_sitter_typescript::LANGUAGE_TYPESCRIPT,
130 Language::Python => tree_sitter_python::LANGUAGE,
131 Language::Java => tree_sitter_java::LANGUAGE,
132 Language::Go => tree_sitter_go::LANGUAGE,
133 Language::C => tree_sitter_c::LANGUAGE,
134 Language::Cpp => tree_sitter_cpp::LANGUAGE,
135 Language::Kotlin => tree_sitter_kotlin_ng::LANGUAGE,
136 Language::Toml => tree_sitter_toml_ng::LANGUAGE,
137 Language::Yaml => tree_sitter_yaml::LANGUAGE,
138 };
139 Ok(lang_ref.into())
140}
141
142#[derive(Debug)]
143pub enum ParseError {
144 LanguageError(String),
145 ParseFailed,
146}
147
148impl std::fmt::Display for ParseError {
149 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
150 match self {
151 ParseError::LanguageError(s) => write!(f, "language error: {}", s),
152 ParseError::ParseFailed => write!(f, "parse failed"),
153 }
154 }
155}
156
157impl std::error::Error for ParseError {}
158
159#[cfg(test)]
160mod tests {
161 use super::*;
162
163 #[test]
164 fn test_parse_rust() {
165 let src = "fn main() { let x = 1; }";
166 let tree = parse_to_cst(src, Language::Rust).unwrap();
167 assert_eq!(tree.kind(), "source_file");
168 assert!(!tree.children().is_empty());
169 }
170
171 #[test]
172 fn test_parse_javascript() {
173 let src = "function foo() { return 42; }";
174 let tree = parse_to_cst(src, Language::JavaScript).unwrap();
175 assert_eq!(tree.kind(), "program");
176 assert!(!tree.children().is_empty());
177 }
178
179 #[test]
180 fn test_parse_kotlin() {
181 let src = "fun main() { val x = 1 }";
182 let tree = parse_to_cst(src, Language::Kotlin).unwrap();
183 assert_eq!(tree.kind(), "source_file");
184 assert!(!tree.children().is_empty());
185 let reconstructed = tree.to_source();
186 assert!(reconstructed.contains("fun"));
187 assert!(reconstructed.contains("val"));
188 }
189
190 #[test]
191 fn test_parse_toml() {
192 let src = "[package]\nname = \"merge-engine\"\nversion = \"0.1.0\"\n";
193 let tree = parse_to_cst(src, Language::Toml).unwrap();
194 assert_eq!(tree.kind(), "document");
195 assert!(!tree.children().is_empty());
196 let reconstructed = tree.to_source();
197 assert!(reconstructed.contains("package"));
198 assert!(reconstructed.contains("name"));
199 }
200
201 #[test]
202 fn test_parse_yaml() {
203 let src = "name: test\non:\n push:\n branches: [main]\njobs:\n build:\n runs-on: ubuntu-latest\n";
204 let tree = parse_to_cst(src, Language::Yaml).unwrap();
205 assert_eq!(tree.kind(), "stream");
206 let reconstructed = tree.to_source();
207 assert!(reconstructed.contains("ubuntu-latest"));
208 assert!(reconstructed.contains("branches"));
209 }
210
211 #[test]
212 fn test_leaf_reconstruction() {
213 let src = "let x = 1;";
214 let tree = parse_to_cst(src, Language::JavaScript).unwrap();
215 let reconstructed = tree.to_source();
217 assert!(reconstructed.contains("let"));
218 assert!(reconstructed.contains("x"));
219 assert!(reconstructed.contains("1"));
220 }
221}