lex_core/lex/testing/lexplore/
loader.rs

1//! File loading, parsing, and tokenization for Lex test harness
2//!
3//! This module provides the core loading infrastructure for the Lexplore test harness,
4//! handling file discovery, reading, parsing, and tokenization.
5//!
6//! The Lexplore API now returns `DocumentLoader` which provides a fluent interface
7//! for running transforms on test files.
8
9use crate::lex::ast::elements::{Annotation, Definition, List, Paragraph, Session, Verbatim};
10use crate::lex::ast::Document;
11use crate::lex::loader::DocumentLoader;
12use crate::lex::parsing::parse_document;
13use crate::lex::parsing::ParseError;
14use crate::lex::testing::lexplore::specfile_finder;
15use std::fs;
16
17// Re-export types from specfile_finder for public API
18pub use specfile_finder::{DocumentType, ElementType};
19
20// Parser enum is now defined in crate::lex::pipeline::loader and re-exported from pipeline module
21
22/// Errors that can occur when loading element sources
23#[derive(Debug, Clone)]
24pub enum ElementSourceError {
25    FileNotFound(String),
26    IoError(String),
27    ParseError(String),
28    InvalidElement(String),
29}
30
31impl std::fmt::Display for ElementSourceError {
32    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
33        match self {
34            ElementSourceError::FileNotFound(msg) => write!(f, "File not found: {msg}"),
35            ElementSourceError::IoError(msg) => write!(f, "IO error: {msg}"),
36            ElementSourceError::ParseError(msg) => write!(f, "Parse error: {msg}"),
37            ElementSourceError::InvalidElement(msg) => write!(f, "Invalid element: {msg}"),
38        }
39    }
40}
41
42impl std::error::Error for ElementSourceError {}
43
44impl From<std::io::Error> for ElementSourceError {
45    fn from(err: std::io::Error) -> Self {
46        ElementSourceError::IoError(err.to_string())
47    }
48}
49
50impl From<ParseError> for ElementSourceError {
51    fn from(err: ParseError) -> Self {
52        ElementSourceError::ParseError(err.to_string())
53    }
54}
55
56impl From<specfile_finder::SpecFileError> for ElementSourceError {
57    fn from(err: specfile_finder::SpecFileError) -> Self {
58        match err {
59            specfile_finder::SpecFileError::FileNotFound(msg) => {
60                ElementSourceError::FileNotFound(msg)
61            }
62            specfile_finder::SpecFileError::IoError(msg) => ElementSourceError::IoError(msg),
63            specfile_finder::SpecFileError::DuplicateNumber(msg) => {
64                ElementSourceError::IoError(msg)
65            }
66        }
67    }
68}
69
70// ElementLoader has been replaced by DocumentLoader from lex::loader
71// Lexplore methods now return DocumentLoader directly
72
73/// Helper function to load and parse an isolated element file
74///
75/// This function orchestrates:
76/// 1. Path resolution via specfile_finder
77/// 2. File parsing via parsing engine (skipping annotation attachment for annotation elements)
78/// 3. Returns the parsed Document
79///
80/// Used internally by the get_* convenience functions.
81fn load_isolated_element(element_type: ElementType, number: usize) -> Document {
82    let path = specfile_finder::find_element_file(element_type, number)
83        .unwrap_or_else(|e| panic!("Failed to find {element_type:?} #{number}: {e}"));
84    let source = fs::read_to_string(&path)
85        .unwrap_or_else(|e| panic!("Failed to read {}: {}", path.display(), e));
86
87    // For annotation elements, skip annotation attachment so they remain in content tree
88    if matches!(element_type, ElementType::Annotation) {
89        use crate::lex::testing::parse_without_annotation_attachment;
90        parse_without_annotation_attachment(&source).unwrap()
91    } else {
92        parse_document(&source).unwrap()
93    }
94}
95
96/// Macro to generate element loader shortcuts
97macro_rules! element_shortcuts {
98    ($($name:ident => $variant:ident, $label:literal);* $(;)?) => {
99        $(
100            #[doc = concat!("Load a ", $label, " file (returns DocumentLoader for transforms)")]
101            pub fn $name(number: usize) -> DocumentLoader {
102                Self::load(ElementType::$variant, number)
103            }
104        )*
105    };
106}
107
108/// Macro to generate document loader shortcuts
109macro_rules! document_shortcuts {
110    ($($name:ident => $variant:ident, $label:literal);* $(;)?) => {
111        $(
112            #[doc = concat!("Load a ", $label, " document (returns DocumentLoader for transforms)")]
113            pub fn $name(number: usize) -> DocumentLoader {
114                Self::load_document(DocumentType::$variant, number)
115            }
116        )*
117    };
118}
119
120// ============================================================================
121// FLUENT API - Delegates to specfile_finder for file resolution
122// ============================================================================
123
124/// Interface for loading per-element test sources
125pub struct Lexplore;
126
127impl Lexplore {
128    // ===== Fluent API - returns DocumentLoader =====
129
130    /// Load an element file by type and number
131    ///
132    /// Returns a `DocumentLoader` which provides transform shortcuts.
133    pub fn load(element_type: ElementType, number: usize) -> DocumentLoader {
134        let path = specfile_finder::find_element_file(element_type, number)
135            .unwrap_or_else(|e| panic!("Failed to find {element_type:?} #{number}: {e}"));
136        DocumentLoader::from_path(path)
137            .unwrap_or_else(|e| panic!("Failed to load {element_type:?} #{number}: {e}"))
138    }
139
140    /// Load a document collection file by type and number
141    ///
142    /// Returns a `DocumentLoader` which provides transform shortcuts.
143    pub fn load_document(doc_type: DocumentType, number: usize) -> DocumentLoader {
144        let path = specfile_finder::find_document_file(doc_type, number)
145            .unwrap_or_else(|e| panic!("Failed to find {doc_type:?} #{number}: {e}"));
146        DocumentLoader::from_path(path)
147            .unwrap_or_else(|e| panic!("Failed to load {doc_type:?} #{number}: {e}"))
148    }
149
150    /// Load from an arbitrary file path
151    ///
152    /// Returns a `DocumentLoader` which provides transform shortcuts.
153    pub fn from_path<P: AsRef<std::path::Path>>(path: P) -> DocumentLoader {
154        DocumentLoader::from_path(path).unwrap_or_else(|e| panic!("Failed to load from path: {e}"))
155    }
156
157    // ===== Isolated element loading (returns AST node directly) =====
158
159    /// Load a paragraph element file and return the paragraph directly
160    ///
161    /// # Example
162    /// ```ignore
163    /// let paragraph = Lexplore::get_paragraph(3);
164    /// assert!(paragraph.text().starts_with("Expected"));
165    /// ```
166    pub fn get_paragraph(number: usize) -> &'static Paragraph {
167        let doc = Box::leak(Box::new(load_isolated_element(
168            ElementType::Paragraph,
169            number,
170        )));
171        doc.root.expect_paragraph()
172    }
173
174    /// Load a list element file and return the list directly
175    pub fn get_list(number: usize) -> &'static List {
176        let doc = Box::leak(Box::new(load_isolated_element(ElementType::List, number)));
177        doc.root.expect_list()
178    }
179
180    /// Load a session element file and return the session directly
181    pub fn get_session(number: usize) -> &'static Session {
182        let doc = Box::leak(Box::new(load_isolated_element(
183            ElementType::Session,
184            number,
185        )));
186        doc.root.expect_session()
187    }
188
189    /// Load a definition element file and return the definition directly
190    pub fn get_definition(number: usize) -> &'static Definition {
191        let doc = Box::leak(Box::new(load_isolated_element(
192            ElementType::Definition,
193            number,
194        )));
195        doc.root.expect_definition()
196    }
197
198    /// Load an annotation element file and return the annotation directly
199    pub fn get_annotation(number: usize) -> &'static Annotation {
200        let doc = Box::leak(Box::new(load_isolated_element(
201            ElementType::Annotation,
202            number,
203        )));
204        doc.root.expect_annotation()
205    }
206
207    /// Load a verbatim element file and return the verbatim block directly
208    pub fn get_verbatim(number: usize) -> &'static Verbatim {
209        let doc = Box::leak(Box::new(load_isolated_element(
210            ElementType::Verbatim,
211            number,
212        )));
213        doc.root.expect_verbatim()
214    }
215
216    // ===== Convenience shortcuts for element files (fluent API) =====
217
218    element_shortcuts! {
219        paragraph => Paragraph, "paragraph";
220        list => List, "list";
221        session => Session, "session";
222        definition => Definition, "definition";
223        annotation => Annotation, "annotation";
224        verbatim => Verbatim, "verbatim";
225        document => Document, "document";
226    }
227
228    // ===== Convenience shortcuts for document collections =====
229
230    document_shortcuts! {
231        benchmark => Benchmark, "benchmark";
232        trifecta => Trifecta, "trifecta";
233    }
234
235    // ===== Utility methods =====
236
237    /// List all available numbers for a given element type
238    pub fn list_numbers_for(element_type: ElementType) -> Result<Vec<usize>, ElementSourceError> {
239        Ok(specfile_finder::list_element_numbers(element_type)?)
240    }
241}
242
243#[cfg(test)]
244mod tests {
245    use super::*;
246    use crate::lex::ast::traits::Container;
247    use crate::lex::lexing::Token;
248    use crate::lex::testing::lexplore::extraction::*;
249    use crate::lex::testing::workspace_path;
250
251    // Tests for the old direct API (get_source_for, etc.) have been removed.
252    // Use the fluent API instead: Lexplore::paragraph(1).parse()
253
254    #[test]
255    fn test_list_numbers_for_paragraphs() {
256        let numbers = Lexplore::list_numbers_for(ElementType::Paragraph).unwrap();
257        assert!(!numbers.is_empty());
258        assert!(numbers.contains(&1));
259    }
260
261    // ===== Fluent API Tests =====
262
263    #[test]
264    fn test_get_paragraph() {
265        let paragraph = Lexplore::get_paragraph(1);
266
267        assert!(paragraph_text_starts_with(paragraph, "This is a simple"));
268    }
269
270    #[test]
271    fn test_get_list() {
272        let list = Lexplore::get_list(1);
273
274        assert!(!list.items.is_empty());
275    }
276
277    #[test]
278    fn test_get_session() {
279        let session = Lexplore::get_session(1);
280
281        assert!(!session.label().is_empty());
282    }
283
284    #[test]
285    fn test_get_definition() {
286        let definition = Lexplore::get_definition(1);
287
288        assert!(!definition.label().is_empty());
289    }
290
291    // Removed test for deleted API: test_must_methods
292
293    // ===== Document Collection Tests =====
294
295    #[test]
296    fn test_benchmark_fluent_api() {
297        let doc = Lexplore::benchmark(10).parse().unwrap();
298
299        assert!(!doc.root.children.is_empty());
300    }
301
302    #[test]
303    fn test_trifecta_fluent_api() {
304        let doc = Lexplore::trifecta(0).parse().unwrap();
305
306        assert!(!doc.root.children.is_empty());
307    }
308
309    #[test]
310    fn test_benchmark_source_only() {
311        let source = Lexplore::benchmark(10).source();
312        assert!(!source.is_empty());
313    }
314
315    #[test]
316    fn test_trifecta_source_only() {
317        let source = Lexplore::trifecta(0).source();
318        assert!(!source.is_empty());
319    }
320
321    // Removed test for deleted API: test_get_document_source_for
322
323    // Removed test for deleted API: test_must_get_document_source_for
324
325    // Removed test for deleted API: test_get_document_ast_for
326
327    // Removed test for deleted API: test_must_get_document_ast_for
328
329    // ===== Tokenization Tests =====
330
331    #[test]
332    fn test_tokenize_paragraph() {
333        let tokens = Lexplore::paragraph(1).tokenize().unwrap();
334
335        assert!(!tokens.is_empty());
336    }
337
338    #[test]
339    fn test_tokenize_list() {
340        let tokens = Lexplore::list(1).tokenize().unwrap();
341
342        assert!(
343            tokens.iter().any(|(t, _)| matches!(t, Token::Dash))
344                || tokens.iter().any(|(t, _)| matches!(t, Token::Number(_)))
345        );
346    }
347
348    #[test]
349    fn test_tokenize_benchmark() {
350        let tokens = Lexplore::benchmark(10).tokenize().unwrap();
351
352        assert!(!tokens.is_empty());
353        assert!(tokens.len() > 10);
354    }
355
356    #[test]
357    fn test_tokenize_trifecta() {
358        let tokens = Lexplore::trifecta(0).tokenize().unwrap();
359
360        assert!(!tokens.is_empty());
361        assert!(tokens.iter().any(|(t, _)| matches!(t, Token::Text(_))));
362    }
363
364    // ===== Path-based Loading Tests =====
365
366    #[test]
367    fn test_from_path_parse() {
368        let path = workspace_path("specs/v1/elements/paragraph.docs/paragraph-01-flat-oneline.lex");
369        let doc = Lexplore::from_path(path).parse().unwrap();
370
371        let paragraph = doc.root.expect_paragraph();
372        assert!(!paragraph.text().is_empty());
373    }
374
375    #[test]
376    fn test_from_path_tokenize() {
377        let path = workspace_path("specs/v1/elements/paragraph.docs/paragraph-01-flat-oneline.lex");
378        let tokens = Lexplore::from_path(path).tokenize().unwrap();
379
380        assert!(!tokens.is_empty());
381        assert!(tokens.iter().any(|(t, _)| matches!(t, Token::Text(_))));
382    }
383
384    #[test]
385    fn test_from_path_source() {
386        let path = workspace_path("specs/v1/elements/paragraph.docs/paragraph-01-flat-oneline.lex");
387        let source = Lexplore::from_path(path).source();
388
389        assert!(!source.is_empty());
390    }
391    // Removed test for deleted API: test_get_source_from_path
392
393    // Removed test for deleted API: test_must_get_source_from_path
394
395    // Removed test for deleted API: test_get_ast_from_path
396
397    // Removed test for deleted API: test_must_get_ast_from_path
398
399    // Removed test for deleted API: test_get_tokens_from_path
400
401    // Removed test for deleted API: test_must_get_tokens_from_path
402
403    #[test]
404    fn test_from_path_with_benchmark() {
405        let path = workspace_path("specs/v1/benchmark/010-kitchensink.lex");
406        let doc = Lexplore::from_path(path).parse().unwrap();
407
408        assert!(!doc.root.children.is_empty());
409    }
410
411    #[test]
412    fn test_from_path_with_trifecta() {
413        let path = workspace_path("specs/v1/trifecta/000-paragraphs.lex");
414        let doc = Lexplore::from_path(path).parse().unwrap();
415
416        assert!(!doc.root.children.is_empty());
417    }
418
419    // ===== Isolated Element Loading Tests =====
420
421    #[test]
422    fn test_get_paragraph_direct() {
423        let paragraph = Lexplore::get_paragraph(1);
424
425        assert!(paragraph_text_starts_with(paragraph, "This is a simple"));
426    }
427
428    #[test]
429    fn test_get_list_direct() {
430        let list = Lexplore::get_list(1);
431
432        assert!(!list.items.is_empty());
433    }
434
435    #[test]
436    fn test_get_session_direct() {
437        let session = Lexplore::get_session(1);
438
439        assert!(!session.label().is_empty());
440    }
441
442    #[test]
443    fn test_get_definition_direct() {
444        let definition = Lexplore::get_definition(1);
445
446        assert!(!definition.label().is_empty());
447    }
448
449    #[test]
450    fn test_get_annotation_direct() {
451        let _annotation = Lexplore::get_annotation(1);
452
453        // Just verify it doesn't panic - annotation was successfully loaded
454    }
455
456    #[test]
457    fn test_get_verbatim_direct() {
458        let _verbatim = Lexplore::get_verbatim(1);
459
460        // Just verify it doesn't panic - verbatim block was successfully loaded
461    }
462}