oak_core/helpers/
parsing.rs

1//! Parser testing utilities for the Oak parsing framework.
2//!
3//! This module provides comprehensive testing infrastructure for parsers,
4//! including file-based testing, expected output comparison, timeout handling,
5//! and test result serialization.
6
7use crate::{
8    Language, Parser,
9    errors::OakError,
10    helpers::{create_file, json_from_path, source_from_path},
11};
12use serde::{Deserialize, Serialize};
13use serde_json::{Serializer, ser::PrettyFormatter};
14use std::{
15    fmt::Debug,
16    path::{Path, PathBuf},
17    time::Duration,
18};
19use walkdir::WalkDir;
20
21/// A concurrent parser testing utility that can run tests against multiple files with timeout support.
22///
23/// The `ParserTester` provides functionality to test parsers against a directory
24/// of files with specific extensions, comparing actual output against expected
25/// results stored in JSON files, with configurable timeout protection.
26pub struct ParserTester {
27    root: PathBuf,
28    extensions: Vec<String>,
29    timeout: Duration,
30}
31
32/// Expected parser test results for comparison.
33///
34/// This struct represents the expected output of a parser test, including
35/// success status, node count, AST structure, and any expected errors.
36#[derive(Debug, Serialize, Deserialize, PartialEq)]
37pub struct ParserTestExpected {
38    success: bool,
39    node_count: usize,
40    ast_structure: AstNodeData,
41    errors: Vec<String>,
42}
43
44/// AST node data structure for parser testing.
45///
46/// Represents a node in the abstract kind tree with its kind, children,
47/// text length, and leaf status used for testing parser output.
48#[derive(Debug, Serialize, Deserialize, PartialEq)]
49pub struct AstNodeData {
50    kind: String,
51    children: Vec<AstNodeData>,
52    text_length: usize,
53    is_leaf: bool,
54}
55
56impl ParserTester {
57    /// Creates a new parser tester with the specified root directory and default 10-second timeout.
58    pub fn new<P: AsRef<Path>>(root: P) -> Self {
59        Self { root: root.as_ref().to_path_buf(), extensions: vec![], timeout: Duration::from_secs(10) }
60    }
61
62    /// Adds a file extension to test against.
63    pub fn with_extension(mut self, extension: impl ToString) -> Self {
64        self.extensions.push(extension.to_string());
65        self
66    }
67
68    /// Sets the timeout for parsing operations.
69    ///
70    /// # Arguments
71    ///
72    /// * `timeout` - The maximum duration to wait for parsing to complete
73    ///
74    /// # Returns
75    ///
76    /// A new `ParserTester` with the specified timeout
77    pub fn with_timeout(mut self, timeout: Duration) -> Self {
78        self.timeout = timeout;
79        self
80    }
81
82    /// Run tests for the given parser against all files in the root directory with the specified extensions.
83    ///
84    /// # Arguments
85    ///
86    /// * `parser`: The parser to test.
87    ///
88    /// # Examples
89    ///
90    /// ```ignore
91    /// use oak_core::helpers::parsing::ParserTester;
92    ///
93    /// let tester = ParserTester::new("tests/parser").with_extension("tex");
94    /// tester.run_tests(my_parser)?;
95    /// ```
96    pub fn run_tests<L, P>(self, parser: &P) -> Result<(), OakError>
97    where
98        P: Parser<L> + Send + Sync,
99        L: Language + Send + Sync + 'static,
100        L::ElementType: Serialize + Debug + Sync + Send + Eq,
101    {
102        let test_files = self.find_test_files()?;
103        let force_regenerated = std::env::var("REGENERATE_TESTS").unwrap_or("0".to_string()) == "1";
104        let mut regenerated_any = false;
105
106        for file_path in test_files {
107            println!("Testing file: {}", file_path.display());
108            regenerated_any |= self.test_single_file::<L, P>(&file_path, parser, force_regenerated)?;
109        }
110
111        if regenerated_any && force_regenerated { Err(OakError::test_regenerated(self.root)) } else { Ok(()) }
112    }
113
114    fn find_test_files(&self) -> Result<Vec<PathBuf>, OakError> {
115        let mut files = Vec::new();
116
117        for entry in WalkDir::new(&self.root) {
118            let entry = entry.unwrap();
119            let path = entry.path();
120
121            if path.is_file() {
122                if let Some(ext) = path.extension() {
123                    let ext_str = ext.to_str().unwrap_or("");
124                    if self.extensions.iter().any(|e| e == ext_str) {
125                        // 忽略由 Tester 自身生成的输出文件,防止递归包含
126                        let file_name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
127                        let is_output_file = file_name.ends_with(".parsed.json") || file_name.ends_with(".lexed.json") || file_name.ends_with(".built.json");
128
129                        if !is_output_file {
130                            files.push(path.to_path_buf());
131                        }
132                    }
133                }
134            }
135        }
136
137        Ok(files)
138    }
139
140    fn test_single_file<L, P>(&self, file_path: &Path, parser: &P, force_regenerated: bool) -> Result<bool, OakError>
141    where
142        P: Parser<L> + Send + Sync,
143        L: Language + Send + Sync + 'static,
144        L::ElementType: Serialize + Debug + Sync + Send,
145    {
146        let source = source_from_path(file_path)?;
147
148        // Perform parsing in a thread and construct test results, with main thread handling timeout control
149        use std::sync::mpsc;
150        let (tx, rx) = mpsc::channel();
151        let timeout = self.timeout;
152        let file_path_string = file_path.display().to_string();
153
154        std::thread::scope(|s| {
155            s.spawn(move || {
156                let mut cache = crate::parser::session::ParseSession::<L>::default();
157                let parse_out = parser.parse(&source, &[], &mut cache);
158
159                // Build AST structure if parse succeeded, else create a minimal error node
160                let (success, ast_structure) = match &parse_out.result {
161                    Ok(root) => {
162                        let ast = Self::to_ast::<L>(root);
163                        (true, ast)
164                    }
165                    Err(_) => {
166                        let ast = AstNodeData { kind: "Error".to_string(), children: vec![], text_length: 0, is_leaf: true };
167                        (false, ast)
168                    }
169                };
170
171                // Collect error messages
172                let mut error_messages: Vec<String> = parse_out.diagnostics.iter().map(|e| e.to_string()).collect();
173                if let Err(e) = &parse_out.result {
174                    error_messages.push(e.to_string());
175                }
176
177                // Count nodes (including leaves)
178                let node_count = Self::count_nodes(&ast_structure);
179
180                let test_result = ParserTestExpected { success, node_count, ast_structure, errors: error_messages };
181
182                let _ = tx.send(Ok::<ParserTestExpected, OakError>(test_result));
183            });
184
185            let mut regenerated = false;
186            match rx.recv_timeout(timeout) {
187                Ok(Ok(test_result)) => {
188                    let expected_file = file_path.with_extension(format!("{}.parsed.json", file_path.extension().unwrap_or_default().to_str().unwrap_or("")));
189
190                    if expected_file.exists() && !force_regenerated {
191                        let expected: ParserTestExpected = json_from_path(&expected_file)?;
192
193                        if test_result != expected {
194                            return Err(OakError::test_failure(file_path.to_path_buf(), format!("{:#?}", expected), format!("{:#?}", test_result)));
195                        }
196                    }
197                    else {
198                        let file = create_file(&expected_file)?;
199                        let mut writer = Serializer::with_formatter(file, PrettyFormatter::with_indent(b"    "));
200                        test_result.serialize(&mut writer)?;
201
202                        if force_regenerated {
203                            regenerated = true;
204                        }
205                        else {
206                            return Err(OakError::test_regenerated(expected_file));
207                        }
208                    }
209                }
210                Ok(Err(e)) => return Err(e),
211                Err(mpsc::RecvTimeoutError::Timeout) => {
212                    return Err(OakError::custom_error(format!("Parser test timed out after {:?} for file: {}", timeout, file_path_string)));
213                }
214                Err(mpsc::RecvTimeoutError::Disconnected) => {
215                    return Err(OakError::custom_error("Parser thread disconnected unexpectedly"));
216                }
217            }
218            Ok(regenerated)
219        })
220    }
221
222    fn to_ast<'a, L: Language>(root: &'a crate::GreenNode<'a, L>) -> AstNodeData {
223        let kind_str = format!("{:?}", root.kind);
224        let mut children = Vec::new();
225        let mut leaf_count: usize = 0;
226        let mut leaf_text_length: usize = 0;
227
228        for c in root.children {
229            match c {
230                crate::GreenTree::Node(n) => children.push(Self::to_ast(n)),
231                crate::GreenTree::Leaf(l) => {
232                    leaf_count += 1;
233                    leaf_text_length += l.length as usize;
234                }
235            }
236        }
237
238        if leaf_count > 0 {
239            children.push(AstNodeData { kind: format!("Leaves({})", leaf_count), children: vec![], text_length: leaf_text_length, is_leaf: true });
240        }
241
242        AstNodeData { kind: kind_str, children, text_length: root.text_len as usize, is_leaf: false }
243    }
244
245    fn count_nodes(node: &AstNodeData) -> usize {
246        1 + node.children.iter().map(Self::count_nodes).sum::<usize>()
247    }
248}