similarity_core/
generic_parser_config.rs

1use serde::{Deserialize, Serialize};
2use std::collections::HashMap;
3use std::fs;
4use std::path::Path;
5
6/// Configuration for a generic tree-sitter based parser
7#[derive(Debug, Clone, Serialize, Deserialize)]
8pub struct GenericParserConfig {
9    /// Language name (e.g., "python", "rust", "javascript")
10    pub language: String,
11
12    /// Node types that represent functions
13    pub function_nodes: Vec<String>,
14
15    /// Node types that represent types/classes
16    pub type_nodes: Vec<String>,
17
18    /// Field mappings for extracting information from nodes
19    pub field_mappings: FieldMappings,
20
21    /// Node types that should have their text value extracted
22    pub value_nodes: Vec<String>,
23
24    /// Optional: Patterns to identify test functions
25    pub test_patterns: Option<TestPatterns>,
26
27    /// Optional: Custom node type mappings
28    pub custom_mappings: Option<HashMap<String, String>>,
29}
30
31#[derive(Debug, Clone, Serialize, Deserialize)]
32pub struct FieldMappings {
33    /// Field name for function/method name
34    pub name_field: String,
35
36    /// Field name for parameters
37    pub params_field: String,
38
39    /// Field name for function body
40    pub body_field: String,
41
42    /// Optional: Field name for decorators/attributes
43    pub decorator_field: Option<String>,
44
45    /// Optional: Field name for parent class
46    pub class_field: Option<String>,
47}
48
49#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct TestPatterns {
51    /// Attribute patterns that indicate test functions
52    pub attribute_patterns: Vec<String>,
53
54    /// Name prefixes that indicate test functions
55    pub name_prefixes: Vec<String>,
56
57    /// Name suffixes that indicate test functions
58    pub name_suffixes: Vec<String>,
59}
60
61impl GenericParserConfig {
62    /// Load configuration from a JSON file
63    pub fn from_file<P: AsRef<Path>>(path: P) -> std::io::Result<Self> {
64        let content = fs::read_to_string(path)?;
65        let config = serde_json::from_str(&content)
66            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
67        Ok(config)
68    }
69
70    /// Save configuration to a JSON file
71    pub fn to_file<P: AsRef<Path>>(&self, path: P) -> std::io::Result<()> {
72        let content = serde_json::to_string_pretty(self)
73            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
74        fs::write(path, content)?;
75        Ok(())
76    }
77}
78
79impl Default for GenericParserConfig {
80    fn default() -> Self {
81        Self {
82            language: "unknown".to_string(),
83            function_nodes: vec![],
84            type_nodes: vec![],
85            field_mappings: FieldMappings {
86                name_field: "name".to_string(),
87                params_field: "parameters".to_string(),
88                body_field: "body".to_string(),
89                decorator_field: None,
90                class_field: None,
91            },
92            value_nodes: vec!["identifier".to_string(), "string".to_string()],
93            test_patterns: None,
94            custom_mappings: None,
95        }
96    }
97}
98
99/// Example configurations for common languages
100impl GenericParserConfig {
101    pub fn python() -> Self {
102        Self {
103            language: "python".to_string(),
104            function_nodes: vec!["function_definition".to_string()],
105            type_nodes: vec!["class_definition".to_string()],
106            field_mappings: FieldMappings {
107                name_field: "name".to_string(),
108                params_field: "parameters".to_string(),
109                body_field: "body".to_string(),
110                decorator_field: Some("decorator".to_string()),
111                class_field: Some("class".to_string()),
112            },
113            value_nodes: vec![
114                "identifier".to_string(),
115                "string".to_string(),
116                "integer".to_string(),
117                "float".to_string(),
118                "true".to_string(),
119                "false".to_string(),
120                "none".to_string(),
121            ],
122            test_patterns: Some(TestPatterns {
123                attribute_patterns: vec!["pytest".to_string(), "unittest".to_string()],
124                name_prefixes: vec!["test_".to_string()],
125                name_suffixes: vec!["_test".to_string()],
126            }),
127            custom_mappings: None,
128        }
129    }
130
131    pub fn rust() -> Self {
132        Self {
133            language: "rust".to_string(),
134            function_nodes: vec!["function_item".to_string()],
135            type_nodes: vec!["struct_item".to_string(), "impl_item".to_string()],
136            field_mappings: FieldMappings {
137                name_field: "name".to_string(),
138                params_field: "parameters".to_string(),
139                body_field: "body".to_string(),
140                decorator_field: Some("attribute_item".to_string()),
141                class_field: None,
142            },
143            value_nodes: vec![
144                "identifier".to_string(),
145                "string_literal".to_string(),
146                "integer_literal".to_string(),
147                "float_literal".to_string(),
148                "boolean_literal".to_string(),
149            ],
150            test_patterns: Some(TestPatterns {
151                attribute_patterns: vec!["#[test]".to_string(), "#[cfg(test)]".to_string()],
152                name_prefixes: vec!["test_".to_string()],
153                name_suffixes: vec![],
154            }),
155            custom_mappings: None,
156        }
157    }
158
159    pub fn javascript() -> Self {
160        Self {
161            language: "javascript".to_string(),
162            function_nodes: vec![
163                "function_declaration".to_string(),
164                "arrow_function".to_string(),
165                "function_expression".to_string(),
166                "method_definition".to_string(),
167            ],
168            type_nodes: vec!["class_declaration".to_string()],
169            field_mappings: FieldMappings {
170                name_field: "name".to_string(),
171                params_field: "parameters".to_string(),
172                body_field: "body".to_string(),
173                decorator_field: None,
174                class_field: None,
175            },
176            value_nodes: vec![
177                "identifier".to_string(),
178                "string".to_string(),
179                "number".to_string(),
180                "true".to_string(),
181                "false".to_string(),
182                "null".to_string(),
183                "undefined".to_string(),
184            ],
185            test_patterns: Some(TestPatterns {
186                attribute_patterns: vec![],
187                name_prefixes: vec!["test".to_string()],
188                name_suffixes: vec![".test".to_string(), ".spec".to_string()],
189            }),
190            custom_mappings: None,
191        }
192    }
193
194    pub fn go() -> Self {
195        Self {
196            language: "go".to_string(),
197            function_nodes: vec![
198                "function_declaration".to_string(),
199                "method_declaration".to_string(),
200            ],
201            type_nodes: vec![
202                "type_declaration".to_string(),
203                "struct_type".to_string(),
204                "interface_type".to_string(),
205            ],
206            field_mappings: FieldMappings {
207                name_field: "name".to_string(),
208                params_field: "parameters".to_string(),
209                body_field: "body".to_string(),
210                decorator_field: None,
211                class_field: None,
212            },
213            value_nodes: vec![
214                "identifier".to_string(),
215                "interpreted_string_literal".to_string(),
216                "raw_string_literal".to_string(),
217                "int_literal".to_string(),
218                "float_literal".to_string(),
219                "true".to_string(),
220                "false".to_string(),
221                "nil".to_string(),
222            ],
223            test_patterns: Some(TestPatterns {
224                attribute_patterns: vec![],
225                name_prefixes: vec!["Test".to_string(), "Benchmark".to_string()],
226                name_suffixes: vec!["_test".to_string()],
227            }),
228            custom_mappings: None,
229        }
230    }
231
232    pub fn java() -> Self {
233        Self {
234            language: "java".to_string(),
235            function_nodes: vec![
236                "method_declaration".to_string(),
237                "constructor_declaration".to_string(),
238            ],
239            type_nodes: vec![
240                "class_declaration".to_string(),
241                "interface_declaration".to_string(),
242                "enum_declaration".to_string(),
243                "annotation_type_declaration".to_string(),
244            ],
245            field_mappings: FieldMappings {
246                name_field: "name".to_string(),
247                params_field: "parameters".to_string(),
248                body_field: "body".to_string(),
249                decorator_field: Some("annotation".to_string()),
250                class_field: None,
251            },
252            value_nodes: vec![
253                "identifier".to_string(),
254                "string_literal".to_string(),
255                "integer_literal".to_string(),
256                "floating_point_literal".to_string(),
257                "true".to_string(),
258                "false".to_string(),
259                "null_literal".to_string(),
260            ],
261            test_patterns: Some(TestPatterns {
262                attribute_patterns: vec!["@Test".to_string(), "@ParameterizedTest".to_string()],
263                name_prefixes: vec!["test".to_string()],
264                name_suffixes: vec!["Test".to_string()],
265            }),
266            custom_mappings: None,
267        }
268    }
269
270    pub fn c() -> Self {
271        Self {
272            language: "c".to_string(),
273            function_nodes: vec!["function_definition".to_string()],
274            type_nodes: vec![
275                "struct_specifier".to_string(),
276                "enum_specifier".to_string(),
277                "union_specifier".to_string(),
278                "type_definition".to_string(),
279            ],
280            field_mappings: FieldMappings {
281                name_field: "name".to_string(),
282                params_field: "parameters".to_string(),
283                body_field: "body".to_string(),
284                decorator_field: None,
285                class_field: None,
286            },
287            value_nodes: vec![
288                "identifier".to_string(),
289                "string_literal".to_string(),
290                "number_literal".to_string(),
291                "true".to_string(),
292                "false".to_string(),
293                "null".to_string(),
294            ],
295            test_patterns: Some(TestPatterns {
296                attribute_patterns: vec![],
297                name_prefixes: vec!["test_".to_string()],
298                name_suffixes: vec!["_test".to_string()],
299            }),
300            custom_mappings: None,
301        }
302    }
303
304    pub fn cpp() -> Self {
305        Self {
306            language: "cpp".to_string(),
307            function_nodes: vec![
308                "function_definition".to_string(),
309                "lambda_expression".to_string(),
310            ],
311            type_nodes: vec![
312                "class_specifier".to_string(),
313                "struct_specifier".to_string(),
314                "enum_specifier".to_string(),
315                "union_specifier".to_string(),
316            ],
317            field_mappings: FieldMappings {
318                name_field: "name".to_string(),
319                params_field: "parameters".to_string(),
320                body_field: "body".to_string(),
321                decorator_field: None,
322                class_field: None,
323            },
324            value_nodes: vec![
325                "identifier".to_string(),
326                "string_literal".to_string(),
327                "number_literal".to_string(),
328                "true".to_string(),
329                "false".to_string(),
330                "nullptr".to_string(),
331            ],
332            test_patterns: Some(TestPatterns {
333                attribute_patterns: vec![],
334                name_prefixes: vec!["test_".to_string(), "Test".to_string()],
335                name_suffixes: vec!["_test".to_string(), "Test".to_string()],
336            }),
337            custom_mappings: None,
338        }
339    }
340
341    pub fn csharp() -> Self {
342        Self {
343            language: "csharp".to_string(),
344            function_nodes: vec![
345                "method_declaration".to_string(),
346                "constructor_declaration".to_string(),
347                "operator_declaration".to_string(),
348                "destructor_declaration".to_string(),
349                "lambda_expression".to_string(),
350            ],
351            type_nodes: vec![
352                "class_declaration".to_string(),
353                "interface_declaration".to_string(),
354                "struct_declaration".to_string(),
355                "enum_declaration".to_string(),
356                "record_declaration".to_string(),
357            ],
358            field_mappings: FieldMappings {
359                name_field: "name".to_string(),
360                params_field: "parameters".to_string(),
361                body_field: "body".to_string(),
362                decorator_field: Some("attribute".to_string()),
363                class_field: None,
364            },
365            value_nodes: vec![
366                "identifier".to_string(),
367                "string_literal".to_string(),
368                "integer_literal".to_string(),
369                "real_literal".to_string(),
370                "true".to_string(),
371                "false".to_string(),
372                "null_literal".to_string(),
373            ],
374            test_patterns: Some(TestPatterns {
375                attribute_patterns: vec![
376                    "[Test]".to_string(),
377                    "[TestMethod]".to_string(),
378                    "[Fact]".to_string(),
379                ],
380                name_prefixes: vec!["Test".to_string()],
381                name_suffixes: vec!["Test".to_string(), "Tests".to_string()],
382            }),
383            custom_mappings: None,
384        }
385    }
386
387    pub fn ruby() -> Self {
388        Self {
389            language: "ruby".to_string(),
390            function_nodes: vec!["method".to_string(), "singleton_method".to_string()],
391            type_nodes: vec!["class".to_string(), "module".to_string()],
392            field_mappings: FieldMappings {
393                name_field: "name".to_string(),
394                params_field: "parameters".to_string(),
395                body_field: "body".to_string(),
396                decorator_field: None,
397                class_field: None,
398            },
399            value_nodes: vec![
400                "identifier".to_string(),
401                "string".to_string(),
402                "integer".to_string(),
403                "float".to_string(),
404                "true".to_string(),
405                "false".to_string(),
406                "nil".to_string(),
407            ],
408            test_patterns: Some(TestPatterns {
409                attribute_patterns: vec![],
410                name_prefixes: vec!["test_".to_string()],
411                name_suffixes: vec!["_test".to_string(), "_spec".to_string()],
412            }),
413            custom_mappings: None,
414        }
415    }
416
417    pub fn php() -> Self {
418        Self {
419            language: "php".to_string(),
420            function_nodes: vec![
421                "function_definition".to_string(),
422                "method_declaration".to_string(),
423            ],
424            type_nodes: vec![
425                "class_declaration".to_string(),
426                "interface_declaration".to_string(),
427                "trait_declaration".to_string(),
428            ],
429            field_mappings: FieldMappings {
430                name_field: "name".to_string(),
431                params_field: "parameters".to_string(),
432                body_field: "body".to_string(),
433                decorator_field: Some("attribute_list".to_string()),
434                class_field: None,
435            },
436            value_nodes: vec![
437                "name".to_string(),
438                "string".to_string(),
439                "integer".to_string(),
440                "float".to_string(),
441                "true".to_string(),
442                "false".to_string(),
443                "null".to_string(),
444            ],
445            test_patterns: Some(TestPatterns {
446                attribute_patterns: vec!["@test".to_string(), "@Test".to_string()],
447                name_prefixes: vec!["test".to_string()],
448                name_suffixes: vec!["Test".to_string()],
449            }),
450            custom_mappings: None,
451        }
452    }
453}
454
455#[cfg(test)]
456mod tests {
457    use super::*;
458
459    #[test]
460    fn test_config_serialization() {
461        let config = GenericParserConfig::go();
462        let json = serde_json::to_string_pretty(&config).unwrap();
463        let deserialized: GenericParserConfig = serde_json::from_str(&json).unwrap();
464        assert_eq!(config.language, deserialized.language);
465    }
466
467    #[test]
468    fn test_config_examples() {
469        let go_config = GenericParserConfig::go();
470        assert_eq!(go_config.language, "go");
471        assert!(go_config.function_nodes.contains(&"function_declaration".to_string()));
472
473        let java_config = GenericParserConfig::java();
474        assert_eq!(java_config.language, "java");
475        assert!(java_config.function_nodes.contains(&"method_declaration".to_string()));
476
477        let c_config = GenericParserConfig::c();
478        assert_eq!(c_config.language, "c");
479        assert!(c_config.function_nodes.contains(&"function_definition".to_string()));
480    }
481}