oak_core/helpers/
parsing.rs

1//! Parser testing utilities for the Oak parsing framework.
2//!
3//! This module provides comprehensive testing infrastructure for parsers,
4//! including file-based testing, expected output comparison, timeout handling,
5//! and test result serialization.
6
7use crate::{
8    Language, Parser,
9    errors::OakError,
10    helpers::{create_file, json_from_path, source_from_path},
11};
12use serde::{Deserialize, Serialize};
13use serde_json::{Serializer, ser::PrettyFormatter};
14use std::{
15    fmt::Debug,
16    path::{Path, PathBuf},
17    time::Duration,
18};
19use walkdir::WalkDir;
20
21/// A concurrent parser testing utility that can run tests against multiple files with timeout support.
22///
23/// The `ParserTester` provides functionality to test parsers against a directory
24/// of files with specific extensions, comparing actual output against expected
25/// results stored in JSON files, with configurable timeout protection.
26pub struct ParserTester {
27    root: PathBuf,
28    extensions: Vec<String>,
29    timeout: Duration,
30}
31
32/// Expected parser test results for comparison.
33///
34/// This struct represents the expected output of a parser test, including
35/// success status, node count, AST structure, and any expected errors.
36#[derive(Debug, Serialize, Deserialize, PartialEq)]
37pub struct ParserTestExpected {
38    success: bool,
39    node_count: usize,
40    ast_structure: AstNodeData,
41    errors: Vec<String>,
42}
43
44/// AST node data structure for parser testing.
45///
46/// Represents a node in the abstract kind tree with its kind, children,
47/// text length, and leaf status used for testing parser output.
48#[derive(Debug, Serialize, Deserialize, PartialEq)]
49pub struct AstNodeData {
50    kind: String,
51    children: Vec<AstNodeData>,
52    text_length: usize,
53    is_leaf: bool,
54}
55
56impl ParserTester {
57    /// Creates a new parser tester with the specified root directory and default 10-second timeout.
58    pub fn new<P: AsRef<Path>>(root: P) -> Self {
59        Self { root: root.as_ref().to_path_buf(), extensions: vec![], timeout: Duration::from_secs(10) }
60    }
61
62    /// Adds a file extension to test against.
63    pub fn with_extension(mut self, extension: impl ToString) -> Self {
64        self.extensions.push(extension.to_string());
65        self
66    }
67
68    /// Sets the timeout for parsing operations.
69    ///
70    /// # Arguments
71    ///
72    /// * `timeout` - The maximum duration to wait for parsing to complete
73    ///
74    /// # Returns
75    ///
76    /// A new `ParserTester` with the specified timeout
77    pub fn with_timeout(mut self, timeout: Duration) -> Self {
78        self.timeout = timeout;
79        self
80    }
81
82    /// Run tests for the given parser against all files in the root directory with the specified extensions.
83    ///
84    /// # Arguments
85    ///
86    /// * `parser`: The parser to test.
87    ///
88    /// # Examples
89    ///
90    /// ```
91    /// use oak_core::helpers::parsing::ParserTester;
92    ///
93    /// let tester = ParserTester::new("tests/parser").with_extension("tex");
94    /// tester.run_tests(my_parser)?;
95    /// ```
96    pub fn run_tests<L, P>(self, parser: &P) -> Result<(), OakError>
97    where
98        P: Parser<L> + Send + Sync,
99        L: Language + Send + Sync,
100        L::SyntaxKind: Serialize + Debug + Sync + Send,
101    {
102        let test_files = self.find_test_files()?;
103
104        for file_path in test_files {
105            println!("Testing file: {}", file_path.display());
106            self.test_single_file::<L, P>(&file_path, parser)?;
107        }
108
109        Ok(())
110    }
111
112    fn find_test_files(&self) -> Result<Vec<PathBuf>, OakError> {
113        let mut files = Vec::new();
114
115        for entry in WalkDir::new(&self.root) {
116            let entry = entry.unwrap();
117            let path = entry.path();
118
119            if path.is_file() {
120                if let Some(ext) = path.extension() {
121                    if self.extensions.iter().any(|e| e == ext.to_str().unwrap_or("")) {
122                        files.push(path.to_path_buf());
123                    }
124                }
125            }
126        }
127
128        Ok(files)
129    }
130
131    fn test_single_file<L, P>(&self, file_path: &Path, parser: &P) -> Result<(), OakError>
132    where
133        P: Parser<L> + Send + Sync,
134        L: Language + Send + Sync,
135        L::SyntaxKind: Serialize + Debug + Sync + Send,
136    {
137        let source = source_from_path(file_path)?;
138
139        // 在线程中执行解析并构造测试结果,主线程做真实超时控制
140        use std::sync::mpsc;
141        let (tx, rx) = mpsc::channel();
142        let timeout = self.timeout;
143        let file_path_string = file_path.display().to_string();
144
145        std::thread::scope(|s| {
146            s.spawn(move || {
147                let parse_out = parser.parse(&source);
148
149                // Build AST structure if parse succeeded, else create a minimal error node
150                let (success, ast_structure) = match &parse_out.result {
151                    Ok(root) => {
152                        let ast = Self::to_ast::<L::SyntaxKind>(root);
153                        (true, ast)
154                    }
155                    Err(_) => {
156                        let ast = AstNodeData { kind: "Error".to_string(), children: vec![], text_length: 0, is_leaf: true };
157                        (false, ast)
158                    }
159                };
160
161                // Collect error messages
162                let mut error_messages: Vec<String> = parse_out.diagnostics.iter().map(|e| e.to_string()).collect();
163                if let Err(e) = &parse_out.result {
164                    error_messages.push(e.to_string());
165                }
166
167                // Count nodes (including leaves)
168                let node_count = Self::count_nodes(&ast_structure);
169
170                let test_result = ParserTestExpected { success, node_count, ast_structure, errors: error_messages };
171
172                let _ = tx.send(Ok::<ParserTestExpected, OakError>(test_result));
173            });
174
175            match rx.recv_timeout(timeout) {
176                Ok(Ok(test_result)) => {
177                    let expected_file = file_path.with_extension(format!(
178                        "{}.expected.json",
179                        file_path.extension().unwrap_or_default().to_str().unwrap_or("")
180                    ));
181
182                    let force_regenerated = std::env::var("REGENERATE_TESTS").unwrap_or("0".to_string()) == "1";
183
184                    if expected_file.exists() && !force_regenerated {
185                        let expected: ParserTestExpected = json_from_path(&expected_file)?;
186
187                        if test_result != expected {
188                            println!("Test failed for file: {}", file_path.display());
189                            println!("Expected: {:#?}", expected);
190                            println!("Actual: {:#?}", test_result);
191                            return Err(OakError::custom_error("Test results do not match expected results"));
192                        }
193                    }
194                    else {
195                        let file = create_file(&expected_file)?;
196                        let mut writer = Serializer::with_formatter(file, PrettyFormatter::with_indent(b"    "));
197                        test_result.serialize(&mut writer)?;
198
199                        println!("Created expected result file: {}\nNeed rerun", expected_file.display());
200                    }
201
202                    Ok(())
203                }
204                Ok(Err(err)) => Err(err),
205                Err(mpsc::RecvTimeoutError::Timeout) => Err(OakError::custom_error(&format!(
206                    "Parser test timed out after {:?} for file: {}",
207                    timeout, file_path_string
208                ))),
209                Err(mpsc::RecvTimeoutError::Disconnected) => Err(OakError::custom_error(&format!(
210                    "Parser test thread panicked or disconnected for file: {}",
211                    file_path_string
212                ))),
213            }
214        })
215    }
216
217    fn to_ast<K: Copy + Debug + Serialize>(root: &triomphe::Arc<crate::GreenNode<K>>) -> AstNodeData {
218        let kind_str = format!("{:?}", root.kind);
219        let children = root
220            .children
221            .iter()
222            .map(|c| match c {
223                crate::GreenTree::Node(n) => Self::to_ast(n),
224                crate::GreenTree::Leaf(l) => {
225                    AstNodeData { kind: format!("{:?}", l.kind), children: vec![], text_length: l.length, is_leaf: true }
226                }
227            })
228            .collect::<Vec<_>>();
229        AstNodeData { kind: kind_str, children, text_length: root.length, is_leaf: false }
230    }
231
232    fn count_nodes(node: &AstNodeData) -> usize {
233        1 + node.children.iter().map(Self::count_nodes).sum::<usize>()
234    }
235}