Skip to main content

lex_core/lex/testing/lexplore/
loader.rs

1//! File loading, parsing, and tokenization for Lex test harness
2//!
3//! This module provides the core loading infrastructure for the Lexplore test harness,
4//! handling file discovery, reading, parsing, and tokenization.
5//!
6//! The Lexplore API now returns `DocumentLoader` which provides a fluent interface
7//! for running transforms on test files.
8
9use crate::lex::ast::elements::{
10    Annotation, Definition, List, Paragraph, Session, Table, Verbatim,
11};
12use crate::lex::ast::Document;
13use crate::lex::loader::DocumentLoader;
14use crate::lex::parsing::parse_document;
15use crate::lex::parsing::ParseError;
16use crate::lex::testing::lexplore::specfile_finder;
17use std::fs;
18
19// Re-export types from specfile_finder for public API
20pub use specfile_finder::{DocumentType, ElementType};
21
22// Parser enum is now defined in crate::lex::pipeline::loader and re-exported from pipeline module
23
24/// Errors that can occur when loading element sources
25#[derive(Debug, Clone)]
26pub enum ElementSourceError {
27    FileNotFound(String),
28    IoError(String),
29    ParseError(String),
30    InvalidElement(String),
31}
32
33impl std::fmt::Display for ElementSourceError {
34    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
35        match self {
36            ElementSourceError::FileNotFound(msg) => write!(f, "File not found: {msg}"),
37            ElementSourceError::IoError(msg) => write!(f, "IO error: {msg}"),
38            ElementSourceError::ParseError(msg) => write!(f, "Parse error: {msg}"),
39            ElementSourceError::InvalidElement(msg) => write!(f, "Invalid element: {msg}"),
40        }
41    }
42}
43
44impl std::error::Error for ElementSourceError {}
45
46impl From<std::io::Error> for ElementSourceError {
47    fn from(err: std::io::Error) -> Self {
48        ElementSourceError::IoError(err.to_string())
49    }
50}
51
52impl From<ParseError> for ElementSourceError {
53    fn from(err: ParseError) -> Self {
54        ElementSourceError::ParseError(err.to_string())
55    }
56}
57
58impl From<specfile_finder::SpecFileError> for ElementSourceError {
59    fn from(err: specfile_finder::SpecFileError) -> Self {
60        match err {
61            specfile_finder::SpecFileError::FileNotFound(msg) => {
62                ElementSourceError::FileNotFound(msg)
63            }
64            specfile_finder::SpecFileError::IoError(msg) => ElementSourceError::IoError(msg),
65            specfile_finder::SpecFileError::DuplicateNumber(msg) => {
66                ElementSourceError::IoError(msg)
67            }
68        }
69    }
70}
71
72// ElementLoader has been replaced by DocumentLoader from lex::loader
73// Lexplore methods now return DocumentLoader directly
74
75/// Helper function to load and parse an isolated element file
76///
77/// This function orchestrates:
78/// 1. Path resolution via specfile_finder
79/// 2. File parsing via parsing engine (skipping annotation attachment for annotation elements)
80/// 3. Returns the parsed Document
81///
82/// Used internally by the get_* convenience functions.
83fn load_isolated_element(element_type: ElementType, number: usize) -> Document {
84    let path = specfile_finder::find_element_file(element_type, number)
85        .unwrap_or_else(|e| panic!("Failed to find {element_type:?} #{number}: {e}"));
86    let source = fs::read_to_string(&path)
87        .unwrap_or_else(|e| panic!("Failed to read {}: {}", path.display(), e));
88
89    // For annotation elements, skip annotation attachment so they remain in content tree
90    if matches!(element_type, ElementType::Annotation) {
91        use crate::lex::testing::parse_without_annotation_attachment;
92        parse_without_annotation_attachment(&source).unwrap()
93    } else {
94        parse_document(&source).unwrap()
95    }
96}
97
98/// Macro to generate element loader shortcuts
99macro_rules! element_shortcuts {
100    ($($name:ident => $variant:ident, $label:literal);* $(;)?) => {
101        $(
102            #[doc = concat!("Load a ", $label, " file (returns DocumentLoader for transforms)")]
103            pub fn $name(number: usize) -> DocumentLoader {
104                Self::load(ElementType::$variant, number)
105            }
106        )*
107    };
108}
109
110/// Macro to generate document loader shortcuts
111macro_rules! document_shortcuts {
112    ($($name:ident => $variant:ident, $label:literal);* $(;)?) => {
113        $(
114            #[doc = concat!("Load a ", $label, " document (returns DocumentLoader for transforms)")]
115            pub fn $name(number: usize) -> DocumentLoader {
116                Self::load_document(DocumentType::$variant, number)
117            }
118        )*
119    };
120}
121
122// ============================================================================
123// FLUENT API - Delegates to specfile_finder for file resolution
124// ============================================================================
125
126/// Interface for loading per-element test sources
127pub struct Lexplore;
128
129impl Lexplore {
130    // ===== Fluent API - returns DocumentLoader =====
131
132    /// Load an element file by type and number
133    ///
134    /// Returns a `DocumentLoader` which provides transform shortcuts.
135    pub fn load(element_type: ElementType, number: usize) -> DocumentLoader {
136        let path = specfile_finder::find_element_file(element_type, number)
137            .unwrap_or_else(|e| panic!("Failed to find {element_type:?} #{number}: {e}"));
138        DocumentLoader::from_path(path)
139            .unwrap_or_else(|e| panic!("Failed to load {element_type:?} #{number}: {e}"))
140    }
141
142    /// Load a document collection file by type and number
143    ///
144    /// Returns a `DocumentLoader` which provides transform shortcuts.
145    pub fn load_document(doc_type: DocumentType, number: usize) -> DocumentLoader {
146        let path = specfile_finder::find_document_file(doc_type, number)
147            .unwrap_or_else(|e| panic!("Failed to find {doc_type:?} #{number}: {e}"));
148        DocumentLoader::from_path(path)
149            .unwrap_or_else(|e| panic!("Failed to load {doc_type:?} #{number}: {e}"))
150    }
151
152    /// Load from an arbitrary file path
153    ///
154    /// Returns a `DocumentLoader` which provides transform shortcuts.
155    pub fn from_path<P: AsRef<std::path::Path>>(path: P) -> DocumentLoader {
156        DocumentLoader::from_path(path).unwrap_or_else(|e| panic!("Failed to load from path: {e}"))
157    }
158
159    // ===== Isolated element loading (returns AST node directly) =====
160
161    /// Load a paragraph element file and return the paragraph directly
162    ///
163    /// # Example
164    /// ```ignore
165    /// let paragraph = Lexplore::get_paragraph(3);
166    /// assert!(paragraph.text().starts_with("Expected"));
167    /// ```
168    pub fn get_paragraph(number: usize) -> &'static Paragraph {
169        let doc = Box::leak(Box::new(load_isolated_element(
170            ElementType::Paragraph,
171            number,
172        )));
173        doc.root.expect_paragraph()
174    }
175
176    /// Load a list element file and return the list directly
177    pub fn get_list(number: usize) -> &'static List {
178        let doc = Box::leak(Box::new(load_isolated_element(ElementType::List, number)));
179        doc.root.expect_list()
180    }
181
182    /// Load a session element file and return the session directly
183    pub fn get_session(number: usize) -> &'static Session {
184        let doc = Box::leak(Box::new(load_isolated_element(
185            ElementType::Session,
186            number,
187        )));
188        doc.root.expect_session()
189    }
190
191    /// Load a definition element file and return the definition directly
192    pub fn get_definition(number: usize) -> &'static Definition {
193        let doc = Box::leak(Box::new(load_isolated_element(
194            ElementType::Definition,
195            number,
196        )));
197        doc.root.expect_definition()
198    }
199
200    /// Load an annotation element file and return the annotation directly
201    pub fn get_annotation(number: usize) -> &'static Annotation {
202        let doc = Box::leak(Box::new(load_isolated_element(
203            ElementType::Annotation,
204            number,
205        )));
206        doc.root.expect_annotation()
207    }
208
209    /// Load a verbatim element file and return the verbatim block directly
210    pub fn get_verbatim(number: usize) -> &'static Verbatim {
211        let doc = Box::leak(Box::new(load_isolated_element(
212            ElementType::Verbatim,
213            number,
214        )));
215        doc.root.expect_verbatim()
216    }
217
218    /// Load a table element file and return the table directly
219    pub fn get_table(number: usize) -> &'static Table {
220        let doc = Box::leak(Box::new(load_isolated_element(ElementType::Table, number)));
221        doc.root.expect_table()
222    }
223
224    // ===== Convenience shortcuts for element files (fluent API) =====
225
226    element_shortcuts! {
227        paragraph => Paragraph, "paragraph";
228        list => List, "list";
229        session => Session, "session";
230        definition => Definition, "definition";
231        annotation => Annotation, "annotation";
232        verbatim => Verbatim, "verbatim";
233        table => Table, "table";
234        document => Document, "document";
235    }
236
237    // ===== Convenience shortcuts for document collections =====
238
239    document_shortcuts! {
240        benchmark => Benchmark, "benchmark";
241        trifecta => Trifecta, "trifecta";
242    }
243
244    // ===== Utility methods =====
245
246    /// List all available numbers for a given element type
247    pub fn list_numbers_for(element_type: ElementType) -> Result<Vec<usize>, ElementSourceError> {
248        Ok(specfile_finder::list_element_numbers(element_type)?)
249    }
250}
251
252#[cfg(test)]
253mod tests {
254    use super::*;
255    use crate::lex::ast::traits::Container;
256    use crate::lex::lexing::Token;
257    use crate::lex::testing::lexplore::extraction::*;
258    use crate::lex::testing::workspace_path;
259
260    // Tests for the old direct API (get_source_for, etc.) have been removed.
261    // Use the fluent API instead: Lexplore::paragraph(1).parse()
262
263    #[test]
264    fn test_list_numbers_for_paragraphs() {
265        let numbers = Lexplore::list_numbers_for(ElementType::Paragraph).unwrap();
266        assert!(!numbers.is_empty());
267        assert!(numbers.contains(&1));
268    }
269
270    // ===== Fluent API Tests =====
271
272    #[test]
273    fn test_get_paragraph() {
274        let paragraph = Lexplore::get_paragraph(1);
275
276        assert!(paragraph_text_starts_with(paragraph, "This is a simple"));
277    }
278
279    #[test]
280    fn test_get_list() {
281        let list = Lexplore::get_list(1);
282
283        assert!(!list.items.is_empty());
284    }
285
286    #[test]
287    fn test_get_session() {
288        let session = Lexplore::get_session(1);
289
290        assert!(!session.label().is_empty());
291    }
292
293    #[test]
294    fn test_get_definition() {
295        let definition = Lexplore::get_definition(1);
296
297        assert!(!definition.label().is_empty());
298    }
299
300    // Removed test for deleted API: test_must_methods
301
302    // ===== Document Collection Tests =====
303
304    #[test]
305    fn test_benchmark_fluent_api() {
306        let doc = Lexplore::benchmark(10).parse().unwrap();
307
308        assert!(!doc.root.children.is_empty());
309    }
310
311    #[test]
312    fn test_trifecta_fluent_api() {
313        let doc = Lexplore::trifecta(0).parse().unwrap();
314
315        assert!(!doc.root.children.is_empty());
316    }
317
318    #[test]
319    fn test_benchmark_source_only() {
320        let source = Lexplore::benchmark(10).source();
321        assert!(!source.is_empty());
322    }
323
324    #[test]
325    fn test_trifecta_source_only() {
326        let source = Lexplore::trifecta(0).source();
327        assert!(!source.is_empty());
328    }
329
330    // Removed test for deleted API: test_get_document_source_for
331
332    // Removed test for deleted API: test_must_get_document_source_for
333
334    // Removed test for deleted API: test_get_document_ast_for
335
336    // Removed test for deleted API: test_must_get_document_ast_for
337
338    // ===== Tokenization Tests =====
339
340    #[test]
341    fn test_tokenize_paragraph() {
342        let tokens = Lexplore::paragraph(1).tokenize().unwrap();
343
344        assert!(!tokens.is_empty());
345    }
346
347    #[test]
348    fn test_tokenize_list() {
349        let tokens = Lexplore::list(1).tokenize().unwrap();
350
351        assert!(
352            tokens.iter().any(|(t, _)| matches!(t, Token::Dash))
353                || tokens.iter().any(|(t, _)| matches!(t, Token::Number(_)))
354        );
355    }
356
357    #[test]
358    fn test_tokenize_benchmark() {
359        let tokens = Lexplore::benchmark(10).tokenize().unwrap();
360
361        assert!(!tokens.is_empty());
362        assert!(tokens.len() > 10);
363    }
364
365    #[test]
366    fn test_tokenize_trifecta() {
367        let tokens = Lexplore::trifecta(0).tokenize().unwrap();
368
369        assert!(!tokens.is_empty());
370        assert!(tokens.iter().any(|(t, _)| matches!(t, Token::Text(_))));
371    }
372
373    // ===== Path-based Loading Tests =====
374
375    #[test]
376    fn test_from_path_parse() {
377        let path =
378            workspace_path("comms/specs/elements/paragraph.docs/paragraph-01-flat-oneline.lex");
379        let doc = Lexplore::from_path(path).parse().unwrap();
380
381        let paragraph = doc.root.expect_paragraph();
382        assert!(!paragraph.text().is_empty());
383    }
384
385    #[test]
386    fn test_from_path_tokenize() {
387        let path =
388            workspace_path("comms/specs/elements/paragraph.docs/paragraph-01-flat-oneline.lex");
389        let tokens = Lexplore::from_path(path).tokenize().unwrap();
390
391        assert!(!tokens.is_empty());
392        assert!(tokens.iter().any(|(t, _)| matches!(t, Token::Text(_))));
393    }
394
395    #[test]
396    fn test_from_path_source() {
397        let path =
398            workspace_path("comms/specs/elements/paragraph.docs/paragraph-01-flat-oneline.lex");
399        let source = Lexplore::from_path(path).source();
400
401        assert!(!source.is_empty());
402    }
403    // Removed test for deleted API: test_get_source_from_path
404
405    // Removed test for deleted API: test_must_get_source_from_path
406
407    // Removed test for deleted API: test_get_ast_from_path
408
409    // Removed test for deleted API: test_must_get_ast_from_path
410
411    // Removed test for deleted API: test_get_tokens_from_path
412
413    // Removed test for deleted API: test_must_get_tokens_from_path
414
415    #[test]
416    fn test_from_path_with_benchmark() {
417        let path = workspace_path("comms/specs/benchmark/010-kitchensink.lex");
418        let doc = Lexplore::from_path(path).parse().unwrap();
419
420        assert!(!doc.root.children.is_empty());
421    }
422
423    #[test]
424    fn test_from_path_with_trifecta() {
425        let path = workspace_path("comms/specs/trifecta/000-paragraphs.lex");
426        let doc = Lexplore::from_path(path).parse().unwrap();
427
428        assert!(!doc.root.children.is_empty());
429    }
430
431    // ===== Isolated Element Loading Tests =====
432
433    #[test]
434    fn test_get_paragraph_direct() {
435        let paragraph = Lexplore::get_paragraph(1);
436
437        assert!(paragraph_text_starts_with(paragraph, "This is a simple"));
438    }
439
440    #[test]
441    fn test_get_list_direct() {
442        let list = Lexplore::get_list(1);
443
444        assert!(!list.items.is_empty());
445    }
446
447    #[test]
448    fn test_get_session_direct() {
449        let session = Lexplore::get_session(1);
450
451        assert!(!session.label().is_empty());
452    }
453
454    #[test]
455    fn test_get_definition_direct() {
456        let definition = Lexplore::get_definition(1);
457
458        assert!(!definition.label().is_empty());
459    }
460
461    #[test]
462    fn test_get_annotation_direct() {
463        let _annotation = Lexplore::get_annotation(1);
464
465        // Just verify it doesn't panic - annotation was successfully loaded
466    }
467
468    #[test]
469    fn test_get_verbatim_direct() {
470        let _verbatim = Lexplore::get_verbatim(1);
471
472        // Just verify it doesn't panic - verbatim block was successfully loaded
473    }
474}