lex_core/lex/transforms.rs
1//! Transform pipeline infrastructure
2//!
3//! This module provides a composable, type-safe transformation system that replaces
4//! the old rigid pipeline architecture. Any transform can be chained with another
5//! if their types are compatible, enabling modular and reusable processing stages.
6//!
7//! # Architecture Overview
8//!
9//! The transform system consists of three core concepts:
10//!
11//! ## 1. The `Runnable` Trait
12//!
13//! The fundamental interface for all transformation stages. Any type implementing
14//! `Runnable<I, O>` can transform input of type `I` to output of type `O`:
15//!
16//! ```rust,ignore
17//! pub trait Runnable<I, O> {
18//! fn run(&self, input: I) -> Result<O, TransformError>;
19//! }
20//! ```
21//!
22//! This trait is implemented by individual processing stages (tokenization, parsing, etc.).
23//!
24//! ## 2. The `Transform<I, O>` Type
25//!
26//! A wrapper that enables composition. Any `Runnable` can be converted to a `Transform`,
27//! which provides the `.then()` method for type-safe chaining:
28//!
29//! ```rust,ignore
30//! let pipeline = Transform::from_fn(|x| Ok(x))
31//! .then(Tokenize) // String → Vec<Token>
32//! .then(Parse); // Vec<Token> → Ast
33//! // Result: Transform<String, Ast>
34//! ```
35//!
36//! The compiler enforces that output types match input types at each stage.
37//!
38//! ## 3. Static Lazy Transforms
39//!
40//! Common pipelines are pre-built as static references using `once_cell::sync::Lazy`.
41//! This provides zero-cost abstractions for standard processing paths:
42//!
43//! ```rust,ignore
44//! pub static LEXING: Lazy<Transform<String, TokenStream>> = Lazy::new(|| {
45//! Transform::from_fn(Ok)
46//! .then(CoreTokenization::new())
47//! .then(SemanticIndentation::new())
48//! });
49//! ```
50//!
51//! See the [`standard`] module for all pre-built transforms.
52//!
53//! # Usage Patterns
54//!
55//! ## Direct Transform Usage
56//!
57//! For programmatic access to specific stages:
58//!
59//! ```rust
60//! use lex_parser::lex::transforms::standard::LEXING;
61//!
62//! let tokens = LEXING.run("Session:\n Content\n".to_string())?;
63//! # Ok::<(), Box<dyn std::error::Error>>(())
64//! ```
65//!
66//! ## With DocumentLoader
67//!
68//! For most use cases, use [`DocumentLoader`](crate::lex::loader::DocumentLoader)
69//! which provides convenient shortcuts:
70//!
71//! ```rust
72//! use lex_parser::lex::loader::DocumentLoader;
73//!
74//! let loader = DocumentLoader::from_string("Hello\n");
75//! let doc = loader.parse()?; // Full AST
76//! let tokens = loader.tokenize()?; // Lexed tokens
77//! # Ok::<(), Box<dyn std::error::Error>>(())
78//! ```
79//!
80//! ## Custom Pipelines
81//!
82//! Build custom processing chains for specialized needs:
83//!
84//! ```rust,ignore
85//! use lex_parser::lex::transforms::{Transform, standard::CORE_TOKENIZATION};
86//!
87//! let custom = CORE_TOKENIZATION
88//! .then(MyCustomStage::new())
89//! .then(AnotherStage::new());
90//!
91//! let result = custom.run(source)?;
92//! ```
93//!
94//! # Module Organization
95//!
96//! - [`stages`]: Individual transformation stages (tokenization, indentation, parsing)
97//! - [`standard`]: Pre-built transform combinations for common use cases
98//!
99//! # Design Benefits
100//!
101//! - Type Safety: Compiler verifies pipeline stage compatibility
102//! - Composability: Mix and match stages to create custom pipelines
103//! - Reusability: Share transforms across CLI, tests, and library code
104//! - Clarity: Explicit stage boundaries with clear input/output types
105//! - Testability: Test individual stages in isolation
106
107pub mod stages;
108pub mod standard;
109
110use std::fmt;
111
112/// Error that can occur during transformation
113#[derive(Debug, Clone, PartialEq)]
114pub enum TransformError {
115 /// Generic error with message
116 Error(String),
117 /// Stage failed with specific error
118 StageFailed { stage: String, message: String },
119}
120
121impl fmt::Display for TransformError {
122 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
123 match self {
124 TransformError::Error(msg) => write!(f, "{msg}"),
125 TransformError::StageFailed { stage, message } => {
126 write!(f, "Stage '{stage}' failed: {message}")
127 }
128 }
129 }
130}
131
132impl std::error::Error for TransformError {}
133
134impl From<String> for TransformError {
135 fn from(s: String) -> Self {
136 TransformError::Error(s)
137 }
138}
139
140impl From<&str> for TransformError {
141 fn from(s: &str) -> Self {
142 TransformError::Error(s.to_string())
143 }
144}
145
146/// Trait for anything that can transform an input to an output
147///
148/// This is implemented by individual transformation stages.
149/// The `Transform` struct composes multiple `Runnable` implementations.
150pub trait Runnable<I, O> {
151 /// Execute this transformation on the input
152 fn run(&self, input: I) -> Result<O, TransformError>;
153}
154
155/// A composable transformation pipeline
156///
157/// `Transform<I, O>` represents a transformation from type `I` to type `O`.
158/// It can be composed with other transforms using the `add` method.
159///
160/// # Type Safety
161///
162/// The type system ensures that transforms can only be composed if their
163/// input/output types are compatible. For example:
164///
165/// ```rust,compile_fail
166/// let t1: Transform<String, TokenStream> = ...;
167/// let t2: Transform<Document, String> = ...;
168///
169/// // This will fail to compile - TokenStream != Document
170/// let bad = t1.add_transform(&t2);
171/// ```
172pub struct Transform<I, O> {
173 run_fn: Box<dyn Fn(I) -> Result<O, TransformError> + Send + Sync>,
174}
175
176impl<I, O> Transform<I, O> {
177 /// Create a new identity transform that passes input through unchanged
178 ///
179 /// Note: This only works when `I = O`
180 pub fn identity() -> Self
181 where
182 I: Clone + 'static,
183 O: From<I> + 'static,
184 {
185 Transform {
186 run_fn: Box::new(|input| Ok(O::from(input))),
187 }
188 }
189
190 /// Create a transform from a function
191 pub fn from_fn<F>(f: F) -> Self
192 where
193 F: Fn(I) -> Result<O, TransformError> + Send + Sync + 'static,
194 {
195 Transform {
196 run_fn: Box::new(f),
197 }
198 }
199
200 /// Add a stage to this transform, returning a new transform with extended output type
201 ///
202 /// This is the core composition method. It chains this transform's output into
203 /// the next stage's input, creating a new transform from `I` to `O2`.
204 ///
205 /// # Type Safety
206 ///
207 /// The compiler ensures that the stage's input type matches this transform's
208 /// output type.
209 ///
210 /// # Example
211 ///
212 /// ```rust
213 /// let t1: Transform<String, Tokens> = ...;
214 /// let t2: Transform<Tokens, Ast> = ...;
215 ///
216 /// let combined: Transform<String, Ast> = t1.then_transform(&t2);
217 /// ```
218 pub fn then<O2, S>(self, stage: S) -> Transform<I, O2>
219 where
220 S: Runnable<O, O2> + Send + Sync + 'static,
221 I: 'static,
222 O: 'static,
223 O2: 'static,
224 {
225 let prev_run = self.run_fn;
226 Transform {
227 run_fn: Box::new(move |input| {
228 let intermediate = prev_run(input)?;
229 stage.run(intermediate)
230 }),
231 }
232 }
233
234 /// Chain another transform to this transform
235 ///
236 /// This is similar to `then` but takes a `Transform` instead of a `Runnable`.
237 /// Useful for composing pre-built transform pipelines.
238 ///
239 /// The referenced transform must have a static lifetime (typically created with `lazy_static!`).
240 pub fn then_transform<O2>(self, next: &'static Transform<O, O2>) -> Transform<I, O2>
241 where
242 I: 'static,
243 O: 'static,
244 O2: 'static,
245 {
246 let prev_run = self.run_fn;
247 Transform {
248 run_fn: Box::new(move |input| {
249 let intermediate = prev_run(input)?;
250 next.run(intermediate)
251 }),
252 }
253 }
254
255 /// Execute this transform on the given input
256 pub fn run(&self, input: I) -> Result<O, TransformError> {
257 (self.run_fn)(input)
258 }
259}
260
261// Implement Runnable for Transform so transforms can be used as stages
262impl<I, O> Runnable<I, O> for Transform<I, O>
263where
264 I: 'static,
265 O: 'static,
266{
267 fn run(&self, input: I) -> Result<O, TransformError> {
268 Transform::run(self, input)
269 }
270}
271
272// Helper for creating transforms from closures
273impl<I, O> Transform<I, O> {
274 /// Create a new transform with no stages (useful as a starting point for composition)
275 pub fn new<F>(f: F) -> Self
276 where
277 F: Fn(I) -> Result<O, TransformError> + Send + Sync + 'static,
278 {
279 Transform::from_fn(f)
280 }
281}
282
283#[cfg(test)]
284mod tests {
285 use super::*;
286
287 // Test helpers - simple stages for composition
288 struct DoubleNumber;
289 impl Runnable<i32, i32> for DoubleNumber {
290 fn run(&self, input: i32) -> Result<i32, TransformError> {
291 Ok(input * 2)
292 }
293 }
294
295 struct AddTen;
296 impl Runnable<i32, i32> for AddTen {
297 fn run(&self, input: i32) -> Result<i32, TransformError> {
298 Ok(input + 10)
299 }
300 }
301
302 struct IntToString;
303 impl Runnable<i32, String> for IntToString {
304 fn run(&self, input: i32) -> Result<String, TransformError> {
305 Ok(input.to_string())
306 }
307 }
308
309 struct FailingStage;
310 impl Runnable<i32, i32> for FailingStage {
311 fn run(&self, _input: i32) -> Result<i32, TransformError> {
312 Err(TransformError::Error("intentional failure".to_string()))
313 }
314 }
315
316 #[test]
317 fn test_transform_from_fn() {
318 let transform = Transform::from_fn(|x: i32| Ok(x * 2));
319 assert_eq!(transform.run(5).unwrap(), 10);
320 }
321
322 #[test]
323 fn test_single_stage() {
324 let transform = Transform::from_fn(|x: i32| Ok(x)).then(DoubleNumber);
325 assert_eq!(transform.run(5).unwrap(), 10);
326 }
327
328 #[test]
329 fn test_multiple_same_type_stages() {
330 let transform = Transform::from_fn(|x: i32| Ok(x))
331 .then(DoubleNumber)
332 .then(AddTen)
333 .then(DoubleNumber);
334
335 // (5 * 2) + 10 = 20, then 20 * 2 = 40
336 assert_eq!(transform.run(5).unwrap(), 40);
337 }
338
339 #[test]
340 fn test_type_changing_stage() {
341 let transform = Transform::from_fn(|x: i32| Ok(x))
342 .then(DoubleNumber)
343 .then(IntToString);
344
345 assert_eq!(transform.run(5).unwrap(), "10");
346 }
347
348 #[test]
349 fn test_error_propagation() {
350 let transform = Transform::from_fn(|x: i32| Ok(x))
351 .then(DoubleNumber)
352 .then(FailingStage)
353 .then(AddTen);
354
355 let result = transform.run(5);
356 assert!(result.is_err());
357 assert_eq!(
358 result.unwrap_err(),
359 TransformError::Error("intentional failure".to_string())
360 );
361 }
362
363 #[test]
364 fn test_transform_composition() {
365 // Build sub-transforms
366 let double_and_add = Transform::from_fn(|x: i32| Ok(x))
367 .then(DoubleNumber)
368 .then(AddTen);
369
370 let to_string = Transform::from_fn(|x: i32| Ok(x)).then(IntToString);
371
372 // Compose them (note: we need to use static refs for then_transform)
373 // For now, just test that individual transforms work
374 assert_eq!(double_and_add.run(5).unwrap(), 20);
375 assert_eq!(to_string.run(5).unwrap(), "5");
376 }
377
378 #[test]
379 fn test_error_display() {
380 let err = TransformError::Error("test error".to_string());
381 assert_eq!(format!("{err}"), "test error");
382
383 let stage_err = TransformError::StageFailed {
384 stage: "tokenization".to_string(),
385 message: "invalid token".to_string(),
386 };
387 assert_eq!(
388 format!("{stage_err}"),
389 "Stage 'tokenization' failed: invalid token"
390 );
391 }
392
393 #[test]
394 fn test_error_conversion() {
395 let err1: TransformError = "string error".into();
396 assert_eq!(err1, TransformError::Error("string error".to_string()));
397
398 let err2: TransformError = "owned string".to_string().into();
399 assert_eq!(err2, TransformError::Error("owned string".to_string()));
400 }
401}