lex_core/lex/
loader.rs

1//! Document loading and transform execution
2//!
3//! This module provides [`DocumentLoader`] - the universal API for loading and processing
4//! lex source text. It abstracts over input sources (file vs. string) and provides convenient
5//! shortcuts for common transform operations.
6//!
7//! # Purpose
8//!
9//! `DocumentLoader` serves as the primary entry point for most lex processing tasks:
10//!
11//! - Universal Input: Load from files or strings with the same API
12//! - Transform Shortcuts: Common operations (`.parse()`, `.tokenize()`) built-in
13//! - Custom Transforms: Execute any transform via `.with()`
14//! - Source Access: Retrieve original source text via `.source()`
15//! - Reusable: Create once, run multiple transforms on the same source
16//!
17//! # Relationship to Transform System
18//!
19//! `DocumentLoader` is a convenience layer on top of the [transform system](crate::lex::transforms).
20//! It manages source text loading and delegates to the appropriate transform:
21//!
22//! - `.parse()` → Uses [`STRING_TO_AST`]
23//! - `.tokenize()` → Uses [`LEXING`]
24//! - `.base_tokens()` → Uses [`CORE_TOKENIZATION`]
25//! - `.with(transform)` → Runs any custom transform
26//!
27//! # Common Usage Patterns
28//!
29//! ## Parse a Document
30//!
31//! ```rust
32//! use lex_parser::lex::loader::DocumentLoader;
33//!
34//! let loader = DocumentLoader::from_string("Session:\n    Content\n");
35//! let doc = loader.parse()?;
36//! # Ok::<(), Box<dyn std::error::Error>>(())
37//! ```
38//!
39//! ## Load from File
40//!
41//! ```rust,no_run
42//! use lex_parser::lex::loader::DocumentLoader;
43//!
44//! let loader = DocumentLoader::from_path("document.lex")?;
45//! let doc = loader.parse()?;
46//! # Ok::<(), Box<dyn std::error::Error>>(())
47//! ```
48//!
49//! ## Get Tokens
50//!
51//! ```rust
52//! use lex_parser::lex::loader::DocumentLoader;
53//!
54//! let loader = DocumentLoader::from_string("Hello world\n");
55//! let tokens = loader.tokenize()?;  // With semantic indentation
56//! let base = loader.base_tokens()?; // Core tokens only
57//! # Ok::<(), Box<dyn std::error::Error>>(())
58//! ```
59//!
60//! ## Custom Transform
61//!
62//! ```rust
63//! use lex_parser::lex::loader::DocumentLoader;
64//! use lex_parser::lex::transforms::standard::TO_IR;
65//!
66//! let loader = DocumentLoader::from_string("Hello\n");
67//! let ir = loader.with(&*TO_IR)?;  // Get intermediate representation
68//! # Ok::<(), Box<dyn std::error::Error>>(())
69//! ```
70//!
71//! ## Multiple Operations on Same Source
72//!
73//! ```rust
74//! use lex_parser::lex::loader::DocumentLoader;
75//!
76//! let loader = DocumentLoader::from_string("Hello\n");
77//! let source = loader.source();      // Get original text
78//! let tokens = loader.tokenize()?;   // Get tokens
79//! let doc = loader.parse()?;         // Get AST
80//! # Ok::<(), Box<dyn std::error::Error>>(())
81//! ```
82//!
83//! # Use Cases
84//!
85//! - CLI Tools: Load files and apply stage+format transforms
86//! - Tests: Load test fixtures and verify different processing stages
87//! - Library Code: Process lex documents programmatically
88//! - REPL/Interactive: Parse user input on-the-fly
89
90use crate::lex::parsing::Document;
91use crate::lex::transforms::standard::{TokenStream, CORE_TOKENIZATION, LEXING, STRING_TO_AST};
92use crate::lex::transforms::{Transform, TransformError};
93use std::fs;
94use std::path::Path;
95
96/// Error that can occur when loading documents
97#[derive(Debug, Clone)]
98pub enum LoaderError {
99    /// IO error when reading file
100    IoError(String),
101    /// Transform/parsing error
102    TransformError(TransformError),
103}
104
105impl std::fmt::Display for LoaderError {
106    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
107        match self {
108            LoaderError::IoError(msg) => write!(f, "IO error: {msg}"),
109            LoaderError::TransformError(err) => write!(f, "Transform error: {err}"),
110        }
111    }
112}
113
114impl std::error::Error for LoaderError {}
115
116impl From<std::io::Error> for LoaderError {
117    fn from(err: std::io::Error) -> Self {
118        LoaderError::IoError(err.to_string())
119    }
120}
121
122impl From<TransformError> for LoaderError {
123    fn from(err: TransformError) -> Self {
124        LoaderError::TransformError(err)
125    }
126}
127
128/// Document loader with transform shortcuts
129///
130/// `DocumentLoader` provides a convenient API for loading source text and running
131/// transforms on it. It's used by both production code (CLI, libraries) and tests.
132///
133/// # Example
134///
135/// ```rust
136/// use lex_parser::lex::loader::DocumentLoader;
137///
138/// // Load from file and parse
139/// let doc = DocumentLoader::from_path("example.lex")
140///     .unwrap()
141///     .parse()
142///     .unwrap();
143///
144/// // Load from string and get tokens
145/// let tokens = DocumentLoader::from_string("Hello world\n")
146///     .tokenize()
147///     .unwrap();
148/// ```
149pub struct DocumentLoader {
150    source: String,
151}
152
153impl DocumentLoader {
154    /// Load from a file path
155    ///
156    /// # Example
157    ///
158    /// ```rust
159    /// use lex_parser::lex::loader::DocumentLoader;
160    ///
161    /// let loader = DocumentLoader::from_path("example.lex").unwrap();
162    /// ```
163    pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self, LoaderError> {
164        let source = fs::read_to_string(path)?;
165        Ok(DocumentLoader { source })
166    }
167
168    /// Load from a string
169    ///
170    /// # Example
171    ///
172    /// ```rust
173    /// use lex_parser::lex::loader::DocumentLoader;
174    ///
175    /// let loader = DocumentLoader::from_string("Hello world\n");
176    /// ```
177    pub fn from_string<S: Into<String>>(source: S) -> Self {
178        DocumentLoader {
179            source: source.into(),
180        }
181    }
182
183    /// Run a custom transform on the source
184    ///
185    /// This is the generic method that all shortcuts use internally.
186    ///
187    /// # Example
188    ///
189    /// ```rust
190    /// use lex_parser::lex::loader::DocumentLoader;
191    /// use lex_parser::lex::transforms::standard::LEXING;
192    ///
193    /// let loader = DocumentLoader::from_string("Hello\n");
194    /// let tokens = loader.with(&*LEXING).unwrap();
195    /// ```
196    pub fn with<O: 'static>(&self, transform: &Transform<String, O>) -> Result<O, LoaderError> {
197        Ok(transform.run(self.source.clone())?)
198    }
199
200    /// Parse the source into a Document AST
201    ///
202    /// This is a shortcut for `.with(&STRING_TO_AST)`.
203    ///
204    /// # Example
205    ///
206    /// ```rust
207    /// use lex_parser::lex::loader::DocumentLoader;
208    ///
209    /// let doc = DocumentLoader::from_string("Hello world\n")
210    ///     .parse()
211    ///     .unwrap();
212    /// ```
213    pub fn parse(&self) -> Result<Document, LoaderError> {
214        self.with(&STRING_TO_AST)
215    }
216
217    /// Tokenize the source with full lexing (including semantic indentation)
218    ///
219    /// This is a shortcut for `.with(&LEXING)`.
220    ///
221    /// # Example
222    ///
223    /// ```rust
224    /// use lex_parser::lex::loader::DocumentLoader;
225    ///
226    /// let tokens = DocumentLoader::from_string("Session:\n    Content\n")
227    ///     .tokenize()
228    ///     .unwrap();
229    /// // tokens include Indent/Dedent
230    /// ```
231    pub fn tokenize(&self) -> Result<TokenStream, LoaderError> {
232        self.with(&LEXING)
233    }
234
235    /// Get base tokens (core tokenization only, no semantic indentation)
236    ///
237    /// This is a shortcut for `.with(&CORE_TOKENIZATION)`.
238    ///
239    /// # Example
240    ///
241    /// ```rust
242    /// use lex_parser::lex::loader::DocumentLoader;
243    ///
244    /// let tokens = DocumentLoader::from_string("Hello\n")
245    ///     .base_tokens()
246    ///     .unwrap();
247    /// // tokens include raw Indentation tokens, not Indent/Dedent
248    /// ```
249    pub fn base_tokens(&self) -> Result<TokenStream, LoaderError> {
250        self.with(&CORE_TOKENIZATION)
251    }
252
253    /// Get the raw source string
254    ///
255    /// # Example
256    ///
257    /// ```rust
258    /// use lex_parser::lex::loader::DocumentLoader;
259    ///
260    /// let loader = DocumentLoader::from_string("Hello\n");
261    /// assert_eq!(loader.source(), "Hello\n");
262    /// ```
263    pub fn source(&self) -> String {
264        self.source.clone()
265    }
266
267    /// Get a reference to the raw source string
268    ///
269    /// Use this when you don't need an owned copy.
270    pub fn source_ref(&self) -> &str {
271        &self.source
272    }
273}
274
275#[cfg(test)]
276mod tests {
277    use super::*;
278    use crate::lex::testing::workspace_path;
279    use crate::lex::token::Token;
280
281    #[test]
282    fn test_from_string() {
283        let loader = DocumentLoader::from_string("Hello world\n");
284        assert_eq!(loader.source(), "Hello world\n");
285    }
286
287    #[test]
288    fn test_from_path() {
289        let path = workspace_path("specs/v1/elements/paragraph.docs/paragraph-01-flat-oneline.lex");
290        let loader = DocumentLoader::from_path(path).unwrap();
291        assert!(!loader.source().is_empty());
292    }
293
294    #[test]
295    fn test_from_path_nonexistent() {
296        let result = DocumentLoader::from_path("nonexistent.lex");
297        assert!(result.is_err());
298    }
299
300    #[test]
301    fn test_parse() {
302        let loader = DocumentLoader::from_string("Hello world\n");
303        let doc = loader.parse().unwrap();
304        assert!(!doc.root.children.is_empty());
305    }
306
307    #[test]
308    fn test_parse_with_session() {
309        let loader = DocumentLoader::from_string("Session:\n    Content here\n");
310        let doc = loader.parse().unwrap();
311        assert!(!doc.root.children.is_empty());
312    }
313
314    #[test]
315    fn test_tokenize() {
316        let loader = DocumentLoader::from_string("Session:\n    Content\n");
317        let tokens = loader.tokenize().unwrap();
318
319        // Should have Indent/Dedent tokens
320        assert!(tokens.iter().any(|(t, _)| matches!(t, Token::Indent(_))));
321        assert!(tokens.iter().any(|(t, _)| matches!(t, Token::Dedent(_))));
322    }
323
324    #[test]
325    fn test_base_tokens() {
326        let loader = DocumentLoader::from_string("Hello world\n");
327        let tokens = loader.base_tokens().unwrap();
328
329        assert!(!tokens.is_empty());
330        // Should not have Indent/Dedent (those come from semantic indentation)
331        assert!(!tokens.iter().any(|(t, _)| matches!(t, Token::Indent(_))));
332    }
333
334    #[test]
335    fn test_base_tokens_has_indentation() {
336        let loader = DocumentLoader::from_string("    Hello\n");
337        let tokens = loader.base_tokens().unwrap();
338
339        // Should have raw Indentation tokens
340        assert!(tokens.iter().any(|(t, _)| matches!(t, Token::Indentation)));
341    }
342
343    #[test]
344    fn test_source() {
345        let loader = DocumentLoader::from_string("Test content\n");
346        assert_eq!(loader.source(), "Test content\n");
347    }
348
349    #[test]
350    fn test_with_custom_transform() {
351        let loader = DocumentLoader::from_string("Hello\n");
352        let tokens = loader.with(&CORE_TOKENIZATION).unwrap();
353        assert!(!tokens.is_empty());
354    }
355
356    #[test]
357    fn test_loader_is_reusable() {
358        let loader = DocumentLoader::from_string("Hello\n");
359
360        // Can call multiple methods on the same loader
361        let _tokens = loader.tokenize().unwrap();
362        let _doc = loader.parse().unwrap();
363        let _source = loader.source();
364
365        // All should work
366    }
367
368    #[test]
369    fn test_from_path_integration() {
370        let path = workspace_path("specs/v1/benchmark/010-kitchensink.lex");
371        let loader = DocumentLoader::from_path(path).unwrap();
372
373        let doc = loader.parse().unwrap();
374        assert!(!doc.root.children.is_empty());
375    }
376}