Skip to main content

lex_core/lex/testing/lexplore/
loader.rs

1//! File loading, parsing, and tokenization for Lex test harness
2//!
3//! This module provides the core loading infrastructure for the Lexplore test harness,
4//! handling file discovery, reading, parsing, and tokenization.
5//!
6//! The Lexplore API now returns `DocumentLoader` which provides a fluent interface
7//! for running transforms on test files.
8
9use crate::lex::ast::elements::{
10    Annotation, Definition, List, Paragraph, Session, Table, Verbatim,
11};
12use crate::lex::ast::Document;
13use crate::lex::loader::DocumentLoader;
14use crate::lex::parsing::parse_document;
15use crate::lex::parsing::ParseError;
16use crate::lex::testing::lexplore::specfile_finder;
17use std::fs;
18
19// Re-export types from specfile_finder for public API
20pub use specfile_finder::{DocumentType, ElementType};
21
22// Parser enum is now defined in crate::lex::pipeline::loader and re-exported from pipeline module
23
24/// Errors that can occur when loading element sources
25#[derive(Debug, Clone)]
26pub enum ElementSourceError {
27    FileNotFound(String),
28    IoError(String),
29    ParseError(String),
30    InvalidElement(String),
31}
32
33impl std::fmt::Display for ElementSourceError {
34    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
35        match self {
36            ElementSourceError::FileNotFound(msg) => write!(f, "File not found: {msg}"),
37            ElementSourceError::IoError(msg) => write!(f, "IO error: {msg}"),
38            ElementSourceError::ParseError(msg) => write!(f, "Parse error: {msg}"),
39            ElementSourceError::InvalidElement(msg) => write!(f, "Invalid element: {msg}"),
40        }
41    }
42}
43
44impl std::error::Error for ElementSourceError {}
45
46impl From<std::io::Error> for ElementSourceError {
47    fn from(err: std::io::Error) -> Self {
48        ElementSourceError::IoError(err.to_string())
49    }
50}
51
52impl From<ParseError> for ElementSourceError {
53    fn from(err: ParseError) -> Self {
54        ElementSourceError::ParseError(err.to_string())
55    }
56}
57
58impl From<specfile_finder::SpecFileError> for ElementSourceError {
59    fn from(err: specfile_finder::SpecFileError) -> Self {
60        match err {
61            specfile_finder::SpecFileError::FileNotFound(msg) => {
62                ElementSourceError::FileNotFound(msg)
63            }
64            specfile_finder::SpecFileError::IoError(msg) => ElementSourceError::IoError(msg),
65            specfile_finder::SpecFileError::DuplicateNumber(msg) => {
66                ElementSourceError::IoError(msg)
67            }
68        }
69    }
70}
71
72// ElementLoader has been replaced by DocumentLoader from lex::loader
73// Lexplore methods now return DocumentLoader directly
74
75/// Helper function to load and parse an isolated element file
76///
77/// This function orchestrates:
78/// 1. Path resolution via specfile_finder
79/// 2. File parsing via parsing engine (skipping annotation attachment for annotation elements)
80/// 3. Returns the parsed Document
81///
82/// Used internally by the get_* convenience functions.
83fn load_isolated_element(element_type: ElementType, number: usize) -> Document {
84    let path = specfile_finder::find_element_file(element_type, number)
85        .unwrap_or_else(|e| panic!("Failed to find {element_type:?} #{number}: {e}"));
86    let source = fs::read_to_string(&path)
87        .unwrap_or_else(|e| panic!("Failed to read {}: {}", path.display(), e));
88
89    // For annotation elements, skip annotation attachment so they remain in content tree
90    if matches!(element_type, ElementType::Annotation) {
91        use crate::lex::testing::parse_without_annotation_attachment;
92        parse_without_annotation_attachment(&source).unwrap()
93    } else {
94        parse_document(&source).unwrap()
95    }
96}
97
98/// Macro to generate element loader shortcuts
99macro_rules! element_shortcuts {
100    ($($name:ident => $variant:ident, $label:literal);* $(;)?) => {
101        $(
102            #[doc = concat!("Load a ", $label, " file (returns DocumentLoader for transforms)")]
103            pub fn $name(number: usize) -> DocumentLoader {
104                Self::load(ElementType::$variant, number)
105            }
106        )*
107    };
108}
109
110/// Macro to generate document loader shortcuts
111macro_rules! document_shortcuts {
112    ($($name:ident => $variant:ident, $label:literal);* $(;)?) => {
113        $(
114            #[doc = concat!("Load a ", $label, " document (returns DocumentLoader for transforms)")]
115            pub fn $name(number: usize) -> DocumentLoader {
116                Self::load_document(DocumentType::$variant, number)
117            }
118        )*
119    };
120}
121
122// ============================================================================
123// FLUENT API - Delegates to specfile_finder for file resolution
124// ============================================================================
125
126/// Interface for loading per-element test sources
127pub struct Lexplore;
128
129impl Lexplore {
130    // ===== Fluent API - returns DocumentLoader =====
131
132    /// Load an element file by type and number
133    ///
134    /// Returns a `DocumentLoader` which provides transform shortcuts.
135    pub fn load(element_type: ElementType, number: usize) -> DocumentLoader {
136        let path = specfile_finder::find_element_file(element_type, number)
137            .unwrap_or_else(|e| panic!("Failed to find {element_type:?} #{number}: {e}"));
138        DocumentLoader::from_path(path)
139            .unwrap_or_else(|e| panic!("Failed to load {element_type:?} #{number}: {e}"))
140    }
141
142    /// Load a document collection file by type and number
143    ///
144    /// Returns a `DocumentLoader` which provides transform shortcuts.
145    pub fn load_document(doc_type: DocumentType, number: usize) -> DocumentLoader {
146        let path = specfile_finder::find_document_file(doc_type, number)
147            .unwrap_or_else(|e| panic!("Failed to find {doc_type:?} #{number}: {e}"));
148        DocumentLoader::from_path(path)
149            .unwrap_or_else(|e| panic!("Failed to load {doc_type:?} #{number}: {e}"))
150    }
151
152    /// Load from an arbitrary file path
153    ///
154    /// Returns a `DocumentLoader` which provides transform shortcuts.
155    pub fn from_path<P: AsRef<std::path::Path>>(path: P) -> DocumentLoader {
156        DocumentLoader::from_path(path).unwrap_or_else(|e| panic!("Failed to load from path: {e}"))
157    }
158
159    // ===== Isolated element loading (returns AST node directly) =====
160
161    /// Load a paragraph element file and return the paragraph directly
162    ///
163    /// # Example
164    /// ```ignore
165    /// let paragraph = Lexplore::get_paragraph(3);
166    /// assert!(paragraph.text().starts_with("Expected"));
167    /// ```
168    pub fn get_paragraph(number: usize) -> &'static Paragraph {
169        let doc = Box::leak(Box::new(load_isolated_element(
170            ElementType::Paragraph,
171            number,
172        )));
173        doc.root.expect_paragraph()
174    }
175
176    /// Load a list element file and return the list directly
177    pub fn get_list(number: usize) -> &'static List {
178        let doc = Box::leak(Box::new(load_isolated_element(ElementType::List, number)));
179        doc.root.expect_list()
180    }
181
182    /// Load a session element file and return the session directly
183    pub fn get_session(number: usize) -> &'static Session {
184        let doc = Box::leak(Box::new(load_isolated_element(
185            ElementType::Session,
186            number,
187        )));
188        doc.root.expect_session()
189    }
190
191    /// Load a definition element file and return the definition directly
192    pub fn get_definition(number: usize) -> &'static Definition {
193        let doc = Box::leak(Box::new(load_isolated_element(
194            ElementType::Definition,
195            number,
196        )));
197        doc.root.expect_definition()
198    }
199
200    /// Load an annotation element file and return the annotation directly
201    pub fn get_annotation(number: usize) -> &'static Annotation {
202        let doc = Box::leak(Box::new(load_isolated_element(
203            ElementType::Annotation,
204            number,
205        )));
206        doc.root.expect_annotation()
207    }
208
209    /// Load a verbatim element file and return the verbatim block directly
210    pub fn get_verbatim(number: usize) -> &'static Verbatim {
211        let doc = Box::leak(Box::new(load_isolated_element(
212            ElementType::Verbatim,
213            number,
214        )));
215        doc.root.expect_verbatim()
216    }
217
218    /// Load a table element file and return the table directly
219    pub fn get_table(number: usize) -> &'static Table {
220        let doc = Box::leak(Box::new(load_isolated_element(ElementType::Table, number)));
221        doc.root.expect_table()
222    }
223
224    // ===== Convenience shortcuts for element files (fluent API) =====
225
226    element_shortcuts! {
227        paragraph => Paragraph, "paragraph";
228        list => List, "list";
229        session => Session, "session";
230        definition => Definition, "definition";
231        annotation => Annotation, "annotation";
232        verbatim => Verbatim, "verbatim";
233        table => Table, "table";
234        document => Document, "document";
235        footnotes => Footnotes, "footnotes";
236    }
237
238    // ===== Convenience shortcuts for document collections =====
239
240    document_shortcuts! {
241        benchmark => Benchmark, "benchmark";
242        trifecta => Trifecta, "trifecta";
243    }
244
245    // ===== Utility methods =====
246
247    /// List all available numbers for a given element type
248    pub fn list_numbers_for(element_type: ElementType) -> Result<Vec<usize>, ElementSourceError> {
249        Ok(specfile_finder::list_element_numbers(element_type)?)
250    }
251}
252
253#[cfg(test)]
254mod tests {
255    use super::*;
256    use crate::lex::ast::traits::Container;
257    use crate::lex::lexing::Token;
258    use crate::lex::testing::lexplore::extraction::*;
259    use crate::lex::testing::workspace_path;
260
261    // Tests for the old direct API (get_source_for, etc.) have been removed.
262    // Use the fluent API instead: Lexplore::paragraph(1).parse()
263
264    #[test]
265    fn test_list_numbers_for_paragraphs() {
266        let numbers = Lexplore::list_numbers_for(ElementType::Paragraph).unwrap();
267        assert!(!numbers.is_empty());
268        assert!(numbers.contains(&1));
269    }
270
271    // ===== Fluent API Tests =====
272
273    #[test]
274    fn test_get_paragraph() {
275        let paragraph = Lexplore::get_paragraph(1);
276
277        assert!(paragraph_text_starts_with(paragraph, "This is a simple"));
278    }
279
280    #[test]
281    fn test_get_list() {
282        let list = Lexplore::get_list(1);
283
284        assert!(!list.items.is_empty());
285    }
286
287    #[test]
288    fn test_get_session() {
289        let session = Lexplore::get_session(1);
290
291        assert!(!session.label().is_empty());
292    }
293
294    #[test]
295    fn test_get_definition() {
296        let definition = Lexplore::get_definition(1);
297
298        assert!(!definition.label().is_empty());
299    }
300
301    // Removed test for deleted API: test_must_methods
302
303    // ===== Document Collection Tests =====
304
305    #[test]
306    fn test_benchmark_fluent_api() {
307        let doc = Lexplore::benchmark(10).parse().unwrap();
308
309        assert!(!doc.root.children.is_empty());
310    }
311
312    #[test]
313    fn test_trifecta_fluent_api() {
314        let doc = Lexplore::trifecta(0).parse().unwrap();
315
316        assert!(!doc.root.children.is_empty());
317    }
318
319    #[test]
320    fn test_benchmark_source_only() {
321        let source = Lexplore::benchmark(10).source();
322        assert!(!source.is_empty());
323    }
324
325    #[test]
326    fn test_trifecta_source_only() {
327        let source = Lexplore::trifecta(0).source();
328        assert!(!source.is_empty());
329    }
330
331    // Removed test for deleted API: test_get_document_source_for
332
333    // Removed test for deleted API: test_must_get_document_source_for
334
335    // Removed test for deleted API: test_get_document_ast_for
336
337    // Removed test for deleted API: test_must_get_document_ast_for
338
339    // ===== Tokenization Tests =====
340
341    #[test]
342    fn test_tokenize_paragraph() {
343        let tokens = Lexplore::paragraph(1).tokenize().unwrap();
344
345        assert!(!tokens.is_empty());
346    }
347
348    #[test]
349    fn test_tokenize_list() {
350        let tokens = Lexplore::list(1).tokenize().unwrap();
351
352        assert!(
353            tokens.iter().any(|(t, _)| matches!(t, Token::Dash))
354                || tokens.iter().any(|(t, _)| matches!(t, Token::Number(_)))
355        );
356    }
357
358    #[test]
359    fn test_tokenize_benchmark() {
360        let tokens = Lexplore::benchmark(10).tokenize().unwrap();
361
362        assert!(!tokens.is_empty());
363        assert!(tokens.len() > 10);
364    }
365
366    #[test]
367    fn test_tokenize_trifecta() {
368        let tokens = Lexplore::trifecta(0).tokenize().unwrap();
369
370        assert!(!tokens.is_empty());
371        assert!(tokens.iter().any(|(t, _)| matches!(t, Token::Text(_))));
372    }
373
374    // ===== Path-based Loading Tests =====
375
376    #[test]
377    fn test_from_path_parse() {
378        let path =
379            workspace_path("comms/specs/elements/paragraph.docs/paragraph-01-flat-oneline.lex");
380        let doc = Lexplore::from_path(path).parse().unwrap();
381
382        let paragraph = doc.root.expect_paragraph();
383        assert!(!paragraph.text().is_empty());
384    }
385
386    #[test]
387    fn test_from_path_tokenize() {
388        let path =
389            workspace_path("comms/specs/elements/paragraph.docs/paragraph-01-flat-oneline.lex");
390        let tokens = Lexplore::from_path(path).tokenize().unwrap();
391
392        assert!(!tokens.is_empty());
393        assert!(tokens.iter().any(|(t, _)| matches!(t, Token::Text(_))));
394    }
395
396    #[test]
397    fn test_from_path_source() {
398        let path =
399            workspace_path("comms/specs/elements/paragraph.docs/paragraph-01-flat-oneline.lex");
400        let source = Lexplore::from_path(path).source();
401
402        assert!(!source.is_empty());
403    }
404    // Removed test for deleted API: test_get_source_from_path
405
406    // Removed test for deleted API: test_must_get_source_from_path
407
408    // Removed test for deleted API: test_get_ast_from_path
409
410    // Removed test for deleted API: test_must_get_ast_from_path
411
412    // Removed test for deleted API: test_get_tokens_from_path
413
414    // Removed test for deleted API: test_must_get_tokens_from_path
415
416    #[test]
417    fn test_from_path_with_benchmark() {
418        let path = workspace_path("comms/specs/benchmark/010-kitchensink.lex");
419        let doc = Lexplore::from_path(path).parse().unwrap();
420
421        assert!(!doc.root.children.is_empty());
422    }
423
424    #[test]
425    fn test_from_path_with_trifecta() {
426        let path = workspace_path("comms/specs/trifecta/000-paragraphs.lex");
427        let doc = Lexplore::from_path(path).parse().unwrap();
428
429        assert!(!doc.root.children.is_empty());
430    }
431
432    // ===== Isolated Element Loading Tests =====
433
434    #[test]
435    fn test_get_paragraph_direct() {
436        let paragraph = Lexplore::get_paragraph(1);
437
438        assert!(paragraph_text_starts_with(paragraph, "This is a simple"));
439    }
440
441    #[test]
442    fn test_get_list_direct() {
443        let list = Lexplore::get_list(1);
444
445        assert!(!list.items.is_empty());
446    }
447
448    #[test]
449    fn test_get_session_direct() {
450        let session = Lexplore::get_session(1);
451
452        assert!(!session.label().is_empty());
453    }
454
455    #[test]
456    fn test_get_definition_direct() {
457        let definition = Lexplore::get_definition(1);
458
459        assert!(!definition.label().is_empty());
460    }
461
462    #[test]
463    fn test_get_annotation_direct() {
464        let _annotation = Lexplore::get_annotation(1);
465
466        // Just verify it doesn't panic - annotation was successfully loaded
467    }
468
469    #[test]
470    fn test_get_verbatim_direct() {
471        let _verbatim = Lexplore::get_verbatim(1);
472
473        // Just verify it doesn't panic - verbatim block was successfully loaded
474    }
475}