lex_core/lex/loader.rs
1//! Document loading and transform execution
2//!
3//! This module provides [`DocumentLoader`] - the universal API for loading and processing
4//! lex source text. It abstracts over input sources (file vs. string) and provides convenient
5//! shortcuts for common transform operations.
6//!
7//! # Purpose
8//!
9//! `DocumentLoader` serves as the primary entry point for most lex processing tasks:
10//!
11//! - Universal Input: Load from files or strings with the same API
12//! - Transform Shortcuts: Common operations (`.parse()`, `.tokenize()`) built-in
13//! - Custom Transforms: Execute any transform via `.with()`
14//! - Source Access: Retrieve original source text via `.source()`
15//! - Reusable: Create once, run multiple transforms on the same source
16//!
17//! # Relationship to Transform System
18//!
19//! `DocumentLoader` is a convenience layer on top of the [transform system](crate::lex::transforms).
20//! It manages source text loading and delegates to the appropriate transform:
21//!
22//! - `.parse()` → Uses [`STRING_TO_AST`]
23//! - `.tokenize()` → Uses [`LEXING`]
24//! - `.base_tokens()` → Uses [`CORE_TOKENIZATION`]
25//! - `.with(transform)` → Runs any custom transform
26//!
27//! # Common Usage Patterns
28//!
29//! ## Parse a Document
30//!
31//! ```rust
32//! use lex_parser::lex::loader::DocumentLoader;
33//!
34//! let loader = DocumentLoader::from_string("Session:\n Content\n");
35//! let doc = loader.parse()?;
36//! # Ok::<(), Box<dyn std::error::Error>>(())
37//! ```
38//!
39//! ## Load from File
40//!
41//! ```rust,no_run
42//! use lex_parser::lex::loader::DocumentLoader;
43//!
44//! let loader = DocumentLoader::from_path("document.lex")?;
45//! let doc = loader.parse()?;
46//! # Ok::<(), Box<dyn std::error::Error>>(())
47//! ```
48//!
49//! ## Get Tokens
50//!
51//! ```rust
52//! use lex_parser::lex::loader::DocumentLoader;
53//!
54//! let loader = DocumentLoader::from_string("Hello world\n");
55//! let tokens = loader.tokenize()?; // With semantic indentation
56//! let base = loader.base_tokens()?; // Core tokens only
57//! # Ok::<(), Box<dyn std::error::Error>>(())
58//! ```
59//!
60//! ## Custom Transform
61//!
62//! ```rust
63//! use lex_parser::lex::loader::DocumentLoader;
64//! use lex_parser::lex::transforms::standard::TO_IR;
65//!
66//! let loader = DocumentLoader::from_string("Hello\n");
67//! let ir = loader.with(&*TO_IR)?; // Get intermediate representation
68//! # Ok::<(), Box<dyn std::error::Error>>(())
69//! ```
70//!
71//! ## Multiple Operations on Same Source
72//!
73//! ```rust
74//! use lex_parser::lex::loader::DocumentLoader;
75//!
76//! let loader = DocumentLoader::from_string("Hello\n");
77//! let source = loader.source(); // Get original text
78//! let tokens = loader.tokenize()?; // Get tokens
79//! let doc = loader.parse()?; // Get AST
80//! # Ok::<(), Box<dyn std::error::Error>>(())
81//! ```
82//!
83//! # Use Cases
84//!
85//! - CLI Tools: Load files and apply stage+format transforms
86//! - Tests: Load test fixtures and verify different processing stages
87//! - Library Code: Process lex documents programmatically
88//! - REPL/Interactive: Parse user input on-the-fly
89
90use crate::lex::parsing::Document;
91use crate::lex::transforms::standard::{TokenStream, CORE_TOKENIZATION, LEXING, STRING_TO_AST};
92use crate::lex::transforms::{Transform, TransformError};
93use std::fs;
94use std::path::Path;
95
96/// Error that can occur when loading documents
97#[derive(Debug, Clone)]
98pub enum LoaderError {
99 /// IO error when reading file
100 IoError(String),
101 /// Transform/parsing error
102 TransformError(TransformError),
103}
104
105impl std::fmt::Display for LoaderError {
106 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
107 match self {
108 LoaderError::IoError(msg) => write!(f, "IO error: {msg}"),
109 LoaderError::TransformError(err) => write!(f, "Transform error: {err}"),
110 }
111 }
112}
113
114impl std::error::Error for LoaderError {}
115
116impl From<std::io::Error> for LoaderError {
117 fn from(err: std::io::Error) -> Self {
118 LoaderError::IoError(err.to_string())
119 }
120}
121
122impl From<TransformError> for LoaderError {
123 fn from(err: TransformError) -> Self {
124 LoaderError::TransformError(err)
125 }
126}
127
128/// Document loader with transform shortcuts
129///
130/// `DocumentLoader` provides a convenient API for loading source text and running
131/// transforms on it. It's used by both production code (CLI, libraries) and tests.
132///
133/// # Example
134///
135/// ```rust
136/// use lex_parser::lex::loader::DocumentLoader;
137///
138/// // Load from file and parse
139/// let doc = DocumentLoader::from_path("example.lex")
140/// .unwrap()
141/// .parse()
142/// .unwrap();
143///
144/// // Load from string and get tokens
145/// let tokens = DocumentLoader::from_string("Hello world\n")
146/// .tokenize()
147/// .unwrap();
148/// ```
149pub struct DocumentLoader {
150 source: String,
151}
152
153impl DocumentLoader {
154 /// Load from a file path
155 ///
156 /// # Example
157 ///
158 /// ```rust
159 /// use lex_parser::lex::loader::DocumentLoader;
160 ///
161 /// let loader = DocumentLoader::from_path("example.lex").unwrap();
162 /// ```
163 pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self, LoaderError> {
164 let source = fs::read_to_string(path)?;
165 Ok(DocumentLoader { source })
166 }
167
168 /// Load from a string
169 ///
170 /// # Example
171 ///
172 /// ```rust
173 /// use lex_parser::lex::loader::DocumentLoader;
174 ///
175 /// let loader = DocumentLoader::from_string("Hello world\n");
176 /// ```
177 pub fn from_string<S: Into<String>>(source: S) -> Self {
178 DocumentLoader {
179 source: source.into(),
180 }
181 }
182
183 /// Run a custom transform on the source
184 ///
185 /// This is the generic method that all shortcuts use internally.
186 ///
187 /// # Example
188 ///
189 /// ```rust
190 /// use lex_parser::lex::loader::DocumentLoader;
191 /// use lex_parser::lex::transforms::standard::LEXING;
192 ///
193 /// let loader = DocumentLoader::from_string("Hello\n");
194 /// let tokens = loader.with(&*LEXING).unwrap();
195 /// ```
196 pub fn with<O: 'static>(&self, transform: &Transform<String, O>) -> Result<O, LoaderError> {
197 Ok(transform.run(self.source.clone())?)
198 }
199
200 /// Parse the source into a Document AST
201 ///
202 /// This is a shortcut for `.with(&STRING_TO_AST)`.
203 ///
204 /// # Example
205 ///
206 /// ```rust
207 /// use lex_parser::lex::loader::DocumentLoader;
208 ///
209 /// let doc = DocumentLoader::from_string("Hello world\n")
210 /// .parse()
211 /// .unwrap();
212 /// ```
213 pub fn parse(&self) -> Result<Document, LoaderError> {
214 self.with(&STRING_TO_AST)
215 }
216
217 /// Tokenize the source with full lexing (including semantic indentation)
218 ///
219 /// This is a shortcut for `.with(&LEXING)`.
220 ///
221 /// # Example
222 ///
223 /// ```rust
224 /// use lex_parser::lex::loader::DocumentLoader;
225 ///
226 /// let tokens = DocumentLoader::from_string("Session:\n Content\n")
227 /// .tokenize()
228 /// .unwrap();
229 /// // tokens include Indent/Dedent
230 /// ```
231 pub fn tokenize(&self) -> Result<TokenStream, LoaderError> {
232 self.with(&LEXING)
233 }
234
235 /// Get base tokens (core tokenization only, no semantic indentation)
236 ///
237 /// This is a shortcut for `.with(&CORE_TOKENIZATION)`.
238 ///
239 /// # Example
240 ///
241 /// ```rust
242 /// use lex_parser::lex::loader::DocumentLoader;
243 ///
244 /// let tokens = DocumentLoader::from_string("Hello\n")
245 /// .base_tokens()
246 /// .unwrap();
247 /// // tokens include raw Indentation tokens, not Indent/Dedent
248 /// ```
249 pub fn base_tokens(&self) -> Result<TokenStream, LoaderError> {
250 self.with(&CORE_TOKENIZATION)
251 }
252
253 /// Get the raw source string
254 ///
255 /// # Example
256 ///
257 /// ```rust
258 /// use lex_parser::lex::loader::DocumentLoader;
259 ///
260 /// let loader = DocumentLoader::from_string("Hello\n");
261 /// assert_eq!(loader.source(), "Hello\n");
262 /// ```
263 pub fn source(&self) -> String {
264 self.source.clone()
265 }
266
267 /// Get a reference to the raw source string
268 ///
269 /// Use this when you don't need an owned copy.
270 pub fn source_ref(&self) -> &str {
271 &self.source
272 }
273}
274
275#[cfg(test)]
276mod tests {
277 use super::*;
278 use crate::lex::testing::workspace_path;
279 use crate::lex::token::Token;
280
281 #[test]
282 fn test_from_string() {
283 let loader = DocumentLoader::from_string("Hello world\n");
284 assert_eq!(loader.source(), "Hello world\n");
285 }
286
287 #[test]
288 fn test_from_path() {
289 let path =
290 workspace_path("comms/specs/elements/paragraph.docs/paragraph-01-flat-oneline.lex");
291 let loader = DocumentLoader::from_path(path).unwrap();
292 assert!(!loader.source().is_empty());
293 }
294
295 #[test]
296 fn test_from_path_nonexistent() {
297 let result = DocumentLoader::from_path("nonexistent.lex");
298 assert!(result.is_err());
299 }
300
301 #[test]
302 fn test_parse() {
303 let loader = DocumentLoader::from_string("Hello world\n");
304 let doc = loader.parse().unwrap();
305 assert!(!doc.root.children.is_empty());
306 }
307
308 #[test]
309 fn test_parse_with_session() {
310 let loader = DocumentLoader::from_string("Session:\n Content here\n");
311 let doc = loader.parse().unwrap();
312 assert!(!doc.root.children.is_empty());
313 }
314
315 #[test]
316 fn test_tokenize() {
317 let loader = DocumentLoader::from_string("Session:\n Content\n");
318 let tokens = loader.tokenize().unwrap();
319
320 // Should have Indent/Dedent tokens
321 assert!(tokens.iter().any(|(t, _)| matches!(t, Token::Indent(_))));
322 assert!(tokens.iter().any(|(t, _)| matches!(t, Token::Dedent(_))));
323 }
324
325 #[test]
326 fn test_base_tokens() {
327 let loader = DocumentLoader::from_string("Hello world\n");
328 let tokens = loader.base_tokens().unwrap();
329
330 assert!(!tokens.is_empty());
331 // Should not have Indent/Dedent (those come from semantic indentation)
332 assert!(!tokens.iter().any(|(t, _)| matches!(t, Token::Indent(_))));
333 }
334
335 #[test]
336 fn test_base_tokens_has_indentation() {
337 let loader = DocumentLoader::from_string(" Hello\n");
338 let tokens = loader.base_tokens().unwrap();
339
340 // Should have raw Indentation tokens
341 assert!(tokens.iter().any(|(t, _)| matches!(t, Token::Indentation)));
342 }
343
344 #[test]
345 fn test_source() {
346 let loader = DocumentLoader::from_string("Test content\n");
347 assert_eq!(loader.source(), "Test content\n");
348 }
349
350 #[test]
351 fn test_with_custom_transform() {
352 let loader = DocumentLoader::from_string("Hello\n");
353 let tokens = loader.with(&CORE_TOKENIZATION).unwrap();
354 assert!(!tokens.is_empty());
355 }
356
357 #[test]
358 fn test_loader_is_reusable() {
359 let loader = DocumentLoader::from_string("Hello\n");
360
361 // Can call multiple methods on the same loader
362 let _tokens = loader.tokenize().unwrap();
363 let _doc = loader.parse().unwrap();
364 let _source = loader.source();
365
366 // All should work
367 }
368
369 #[test]
370 fn test_from_path_integration() {
371 let path = workspace_path("comms/specs/benchmark/010-kitchensink.lex");
372 let loader = DocumentLoader::from_path(path).unwrap();
373
374 let doc = loader.parse().unwrap();
375 assert!(!doc.root.children.is_empty());
376 }
377}