Skip to main content

lex_core/lex/testing/lexplore/
loader.rs

1//! File loading, parsing, and tokenization for Lex test harness
2//!
3//! This module provides the core loading infrastructure for the Lexplore test harness,
4//! handling file discovery, reading, parsing, and tokenization.
5//!
6//! The Lexplore API now returns `DocumentLoader` which provides a fluent interface
7//! for running transforms on test files.
8
9use crate::lex::ast::elements::{Annotation, Definition, List, Paragraph, Session, Verbatim};
10use crate::lex::ast::Document;
11use crate::lex::loader::DocumentLoader;
12use crate::lex::parsing::parse_document;
13use crate::lex::parsing::ParseError;
14use crate::lex::testing::lexplore::specfile_finder;
15use std::fs;
16
17// Re-export types from specfile_finder for public API
18pub use specfile_finder::{DocumentType, ElementType};
19
20// Parser enum is now defined in crate::lex::pipeline::loader and re-exported from pipeline module
21
22/// Errors that can occur when loading element sources
23#[derive(Debug, Clone)]
24pub enum ElementSourceError {
25    FileNotFound(String),
26    IoError(String),
27    ParseError(String),
28    InvalidElement(String),
29}
30
31impl std::fmt::Display for ElementSourceError {
32    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
33        match self {
34            ElementSourceError::FileNotFound(msg) => write!(f, "File not found: {msg}"),
35            ElementSourceError::IoError(msg) => write!(f, "IO error: {msg}"),
36            ElementSourceError::ParseError(msg) => write!(f, "Parse error: {msg}"),
37            ElementSourceError::InvalidElement(msg) => write!(f, "Invalid element: {msg}"),
38        }
39    }
40}
41
42impl std::error::Error for ElementSourceError {}
43
44impl From<std::io::Error> for ElementSourceError {
45    fn from(err: std::io::Error) -> Self {
46        ElementSourceError::IoError(err.to_string())
47    }
48}
49
50impl From<ParseError> for ElementSourceError {
51    fn from(err: ParseError) -> Self {
52        ElementSourceError::ParseError(err.to_string())
53    }
54}
55
56impl From<specfile_finder::SpecFileError> for ElementSourceError {
57    fn from(err: specfile_finder::SpecFileError) -> Self {
58        match err {
59            specfile_finder::SpecFileError::FileNotFound(msg) => {
60                ElementSourceError::FileNotFound(msg)
61            }
62            specfile_finder::SpecFileError::IoError(msg) => ElementSourceError::IoError(msg),
63            specfile_finder::SpecFileError::DuplicateNumber(msg) => {
64                ElementSourceError::IoError(msg)
65            }
66        }
67    }
68}
69
70// ElementLoader has been replaced by DocumentLoader from lex::loader
71// Lexplore methods now return DocumentLoader directly
72
73/// Helper function to load and parse an isolated element file
74///
75/// This function orchestrates:
76/// 1. Path resolution via specfile_finder
77/// 2. File parsing via parsing engine (skipping annotation attachment for annotation elements)
78/// 3. Returns the parsed Document
79///
80/// Used internally by the get_* convenience functions.
81fn load_isolated_element(element_type: ElementType, number: usize) -> Document {
82    let path = specfile_finder::find_element_file(element_type, number)
83        .unwrap_or_else(|e| panic!("Failed to find {element_type:?} #{number}: {e}"));
84    let source = fs::read_to_string(&path)
85        .unwrap_or_else(|e| panic!("Failed to read {}: {}", path.display(), e));
86
87    // For annotation elements, skip annotation attachment so they remain in content tree
88    if matches!(element_type, ElementType::Annotation) {
89        use crate::lex::testing::parse_without_annotation_attachment;
90        parse_without_annotation_attachment(&source).unwrap()
91    } else {
92        parse_document(&source).unwrap()
93    }
94}
95
96/// Macro to generate element loader shortcuts
97macro_rules! element_shortcuts {
98    ($($name:ident => $variant:ident, $label:literal);* $(;)?) => {
99        $(
100            #[doc = concat!("Load a ", $label, " file (returns DocumentLoader for transforms)")]
101            pub fn $name(number: usize) -> DocumentLoader {
102                Self::load(ElementType::$variant, number)
103            }
104        )*
105    };
106}
107
108/// Macro to generate document loader shortcuts
109macro_rules! document_shortcuts {
110    ($($name:ident => $variant:ident, $label:literal);* $(;)?) => {
111        $(
112            #[doc = concat!("Load a ", $label, " document (returns DocumentLoader for transforms)")]
113            pub fn $name(number: usize) -> DocumentLoader {
114                Self::load_document(DocumentType::$variant, number)
115            }
116        )*
117    };
118}
119
120// ============================================================================
121// FLUENT API - Delegates to specfile_finder for file resolution
122// ============================================================================
123
124/// Interface for loading per-element test sources
125pub struct Lexplore;
126
127impl Lexplore {
128    // ===== Fluent API - returns DocumentLoader =====
129
130    /// Load an element file by type and number
131    ///
132    /// Returns a `DocumentLoader` which provides transform shortcuts.
133    pub fn load(element_type: ElementType, number: usize) -> DocumentLoader {
134        let path = specfile_finder::find_element_file(element_type, number)
135            .unwrap_or_else(|e| panic!("Failed to find {element_type:?} #{number}: {e}"));
136        DocumentLoader::from_path(path)
137            .unwrap_or_else(|e| panic!("Failed to load {element_type:?} #{number}: {e}"))
138    }
139
140    /// Load a document collection file by type and number
141    ///
142    /// Returns a `DocumentLoader` which provides transform shortcuts.
143    pub fn load_document(doc_type: DocumentType, number: usize) -> DocumentLoader {
144        let path = specfile_finder::find_document_file(doc_type, number)
145            .unwrap_or_else(|e| panic!("Failed to find {doc_type:?} #{number}: {e}"));
146        DocumentLoader::from_path(path)
147            .unwrap_or_else(|e| panic!("Failed to load {doc_type:?} #{number}: {e}"))
148    }
149
150    /// Load from an arbitrary file path
151    ///
152    /// Returns a `DocumentLoader` which provides transform shortcuts.
153    pub fn from_path<P: AsRef<std::path::Path>>(path: P) -> DocumentLoader {
154        DocumentLoader::from_path(path).unwrap_or_else(|e| panic!("Failed to load from path: {e}"))
155    }
156
157    // ===== Isolated element loading (returns AST node directly) =====
158
159    /// Load a paragraph element file and return the paragraph directly
160    ///
161    /// # Example
162    /// ```ignore
163    /// let paragraph = Lexplore::get_paragraph(3);
164    /// assert!(paragraph.text().starts_with("Expected"));
165    /// ```
166    pub fn get_paragraph(number: usize) -> &'static Paragraph {
167        let doc = Box::leak(Box::new(load_isolated_element(
168            ElementType::Paragraph,
169            number,
170        )));
171        doc.root.expect_paragraph()
172    }
173
174    /// Load a list element file and return the list directly
175    pub fn get_list(number: usize) -> &'static List {
176        let doc = Box::leak(Box::new(load_isolated_element(ElementType::List, number)));
177        doc.root.expect_list()
178    }
179
180    /// Load a session element file and return the session directly
181    pub fn get_session(number: usize) -> &'static Session {
182        let doc = Box::leak(Box::new(load_isolated_element(
183            ElementType::Session,
184            number,
185        )));
186        doc.root.expect_session()
187    }
188
189    /// Load a definition element file and return the definition directly
190    pub fn get_definition(number: usize) -> &'static Definition {
191        let doc = Box::leak(Box::new(load_isolated_element(
192            ElementType::Definition,
193            number,
194        )));
195        doc.root.expect_definition()
196    }
197
198    /// Load an annotation element file and return the annotation directly
199    pub fn get_annotation(number: usize) -> &'static Annotation {
200        let doc = Box::leak(Box::new(load_isolated_element(
201            ElementType::Annotation,
202            number,
203        )));
204        doc.root.expect_annotation()
205    }
206
207    /// Load a verbatim element file and return the verbatim block directly
208    pub fn get_verbatim(number: usize) -> &'static Verbatim {
209        let doc = Box::leak(Box::new(load_isolated_element(
210            ElementType::Verbatim,
211            number,
212        )));
213        doc.root.expect_verbatim()
214    }
215
216    // ===== Convenience shortcuts for element files (fluent API) =====
217
218    element_shortcuts! {
219        paragraph => Paragraph, "paragraph";
220        list => List, "list";
221        session => Session, "session";
222        definition => Definition, "definition";
223        annotation => Annotation, "annotation";
224        verbatim => Verbatim, "verbatim";
225        document => Document, "document";
226    }
227
228    // ===== Convenience shortcuts for document collections =====
229
230    document_shortcuts! {
231        benchmark => Benchmark, "benchmark";
232        trifecta => Trifecta, "trifecta";
233    }
234
235    // ===== Utility methods =====
236
237    /// List all available numbers for a given element type
238    pub fn list_numbers_for(element_type: ElementType) -> Result<Vec<usize>, ElementSourceError> {
239        Ok(specfile_finder::list_element_numbers(element_type)?)
240    }
241}
242
243#[cfg(test)]
244mod tests {
245    use super::*;
246    use crate::lex::ast::traits::Container;
247    use crate::lex::lexing::Token;
248    use crate::lex::testing::lexplore::extraction::*;
249    use crate::lex::testing::workspace_path;
250
251    // Tests for the old direct API (get_source_for, etc.) have been removed.
252    // Use the fluent API instead: Lexplore::paragraph(1).parse()
253
254    #[test]
255    fn test_list_numbers_for_paragraphs() {
256        let numbers = Lexplore::list_numbers_for(ElementType::Paragraph).unwrap();
257        assert!(!numbers.is_empty());
258        assert!(numbers.contains(&1));
259    }
260
261    // ===== Fluent API Tests =====
262
263    #[test]
264    fn test_get_paragraph() {
265        let paragraph = Lexplore::get_paragraph(1);
266
267        assert!(paragraph_text_starts_with(paragraph, "This is a simple"));
268    }
269
270    #[test]
271    fn test_get_list() {
272        let list = Lexplore::get_list(1);
273
274        assert!(!list.items.is_empty());
275    }
276
277    #[test]
278    fn test_get_session() {
279        let session = Lexplore::get_session(1);
280
281        assert!(!session.label().is_empty());
282    }
283
284    #[test]
285    fn test_get_definition() {
286        let definition = Lexplore::get_definition(1);
287
288        assert!(!definition.label().is_empty());
289    }
290
291    // Removed test for deleted API: test_must_methods
292
293    // ===== Document Collection Tests =====
294
295    #[test]
296    fn test_benchmark_fluent_api() {
297        let doc = Lexplore::benchmark(10).parse().unwrap();
298
299        assert!(!doc.root.children.is_empty());
300    }
301
302    #[test]
303    fn test_trifecta_fluent_api() {
304        let doc = Lexplore::trifecta(0).parse().unwrap();
305
306        assert!(!doc.root.children.is_empty());
307    }
308
309    #[test]
310    fn test_benchmark_source_only() {
311        let source = Lexplore::benchmark(10).source();
312        assert!(!source.is_empty());
313    }
314
315    #[test]
316    fn test_trifecta_source_only() {
317        let source = Lexplore::trifecta(0).source();
318        assert!(!source.is_empty());
319    }
320
321    // Removed test for deleted API: test_get_document_source_for
322
323    // Removed test for deleted API: test_must_get_document_source_for
324
325    // Removed test for deleted API: test_get_document_ast_for
326
327    // Removed test for deleted API: test_must_get_document_ast_for
328
329    // ===== Tokenization Tests =====
330
331    #[test]
332    fn test_tokenize_paragraph() {
333        let tokens = Lexplore::paragraph(1).tokenize().unwrap();
334
335        assert!(!tokens.is_empty());
336    }
337
338    #[test]
339    fn test_tokenize_list() {
340        let tokens = Lexplore::list(1).tokenize().unwrap();
341
342        assert!(
343            tokens.iter().any(|(t, _)| matches!(t, Token::Dash))
344                || tokens.iter().any(|(t, _)| matches!(t, Token::Number(_)))
345        );
346    }
347
348    #[test]
349    fn test_tokenize_benchmark() {
350        let tokens = Lexplore::benchmark(10).tokenize().unwrap();
351
352        assert!(!tokens.is_empty());
353        assert!(tokens.len() > 10);
354    }
355
356    #[test]
357    fn test_tokenize_trifecta() {
358        let tokens = Lexplore::trifecta(0).tokenize().unwrap();
359
360        assert!(!tokens.is_empty());
361        assert!(tokens.iter().any(|(t, _)| matches!(t, Token::Text(_))));
362    }
363
364    // ===== Path-based Loading Tests =====
365
366    #[test]
367    fn test_from_path_parse() {
368        let path =
369            workspace_path("comms/specs/elements/paragraph.docs/paragraph-01-flat-oneline.lex");
370        let doc = Lexplore::from_path(path).parse().unwrap();
371
372        let paragraph = doc.root.expect_paragraph();
373        assert!(!paragraph.text().is_empty());
374    }
375
376    #[test]
377    fn test_from_path_tokenize() {
378        let path =
379            workspace_path("comms/specs/elements/paragraph.docs/paragraph-01-flat-oneline.lex");
380        let tokens = Lexplore::from_path(path).tokenize().unwrap();
381
382        assert!(!tokens.is_empty());
383        assert!(tokens.iter().any(|(t, _)| matches!(t, Token::Text(_))));
384    }
385
386    #[test]
387    fn test_from_path_source() {
388        let path =
389            workspace_path("comms/specs/elements/paragraph.docs/paragraph-01-flat-oneline.lex");
390        let source = Lexplore::from_path(path).source();
391
392        assert!(!source.is_empty());
393    }
394    // Removed test for deleted API: test_get_source_from_path
395
396    // Removed test for deleted API: test_must_get_source_from_path
397
398    // Removed test for deleted API: test_get_ast_from_path
399
400    // Removed test for deleted API: test_must_get_ast_from_path
401
402    // Removed test for deleted API: test_get_tokens_from_path
403
404    // Removed test for deleted API: test_must_get_tokens_from_path
405
406    #[test]
407    fn test_from_path_with_benchmark() {
408        let path = workspace_path("comms/specs/benchmark/010-kitchensink.lex");
409        let doc = Lexplore::from_path(path).parse().unwrap();
410
411        assert!(!doc.root.children.is_empty());
412    }
413
414    #[test]
415    fn test_from_path_with_trifecta() {
416        let path = workspace_path("comms/specs/trifecta/000-paragraphs.lex");
417        let doc = Lexplore::from_path(path).parse().unwrap();
418
419        assert!(!doc.root.children.is_empty());
420    }
421
422    // ===== Isolated Element Loading Tests =====
423
424    #[test]
425    fn test_get_paragraph_direct() {
426        let paragraph = Lexplore::get_paragraph(1);
427
428        assert!(paragraph_text_starts_with(paragraph, "This is a simple"));
429    }
430
431    #[test]
432    fn test_get_list_direct() {
433        let list = Lexplore::get_list(1);
434
435        assert!(!list.items.is_empty());
436    }
437
438    #[test]
439    fn test_get_session_direct() {
440        let session = Lexplore::get_session(1);
441
442        assert!(!session.label().is_empty());
443    }
444
445    #[test]
446    fn test_get_definition_direct() {
447        let definition = Lexplore::get_definition(1);
448
449        assert!(!definition.label().is_empty());
450    }
451
452    #[test]
453    fn test_get_annotation_direct() {
454        let _annotation = Lexplore::get_annotation(1);
455
456        // Just verify it doesn't panic - annotation was successfully loaded
457    }
458
459    #[test]
460    fn test_get_verbatim_direct() {
461        let _verbatim = Lexplore::get_verbatim(1);
462
463        // Just verify it doesn't panic - verbatim block was successfully loaded
464    }
465}