lex_core/lex/
transforms.rs

1//! Transform pipeline infrastructure
2//!
3//! This module provides a composable, type-safe transformation system that replaces
4//! the old rigid pipeline architecture. Any transform can be chained with another
5//! if their types are compatible, enabling modular and reusable processing stages.
6//!
7//! # Architecture Overview
8//!
9//! The transform system consists of three core concepts:
10//!
11//! ## 1. The `Runnable` Trait
12//!
13//! The fundamental interface for all transformation stages. Any type implementing
14//! `Runnable<I, O>` can transform input of type `I` to output of type `O`:
15//!
16//! ```rust,ignore
17//! pub trait Runnable<I, O> {
18//!     fn run(&self, input: I) -> Result<O, TransformError>;
19//! }
20//! ```
21//!
22//! This trait is implemented by individual processing stages (tokenization, parsing, etc.).
23//!
24//! ## 2. The `Transform<I, O>` Type
25//!
26//! A wrapper that enables composition. Any `Runnable` can be converted to a `Transform`,
27//! which provides the `.then()` method for type-safe chaining:
28//!
29//! ```rust,ignore
30//! let pipeline = Transform::from_fn(|x| Ok(x))
31//!     .then(Tokenize)   // String → Vec<Token>
32//!     .then(Parse);     // Vec<Token> → Ast
33//! // Result: Transform<String, Ast>
34//! ```
35//!
36//! The compiler enforces that output types match input types at each stage.
37//!
38//! ## 3. Static Lazy Transforms
39//!
40//! Common pipelines are pre-built as static references using `once_cell::sync::Lazy`.
41//! This provides zero-cost abstractions for standard processing paths:
42//!
43//! ```rust,ignore
44//! pub static LEXING: Lazy<Transform<String, TokenStream>> = Lazy::new(|| {
45//!     Transform::from_fn(Ok)
46//!         .then(CoreTokenization::new())
47//!         .then(SemanticIndentation::new())
48//! });
49//! ```
50//!
51//! See the [`standard`] module for all pre-built transforms.
52//!
53//! # Usage Patterns
54//!
55//! ## Direct Transform Usage
56//!
57//! For programmatic access to specific stages:
58//!
59//! ```rust
60//! use lex_parser::lex::transforms::standard::LEXING;
61//!
62//! let tokens = LEXING.run("Session:\n    Content\n".to_string())?;
63//! # Ok::<(), Box<dyn std::error::Error>>(())
64//! ```
65//!
66//! ## With DocumentLoader
67//!
68//! For most use cases, use [`DocumentLoader`](crate::lex::loader::DocumentLoader)
69//! which provides convenient shortcuts:
70//!
71//! ```rust
72//! use lex_parser::lex::loader::DocumentLoader;
73//!
74//! let loader = DocumentLoader::from_string("Hello\n");
75//! let doc = loader.parse()?;          // Full AST
76//! let tokens = loader.tokenize()?;     // Lexed tokens
77//! # Ok::<(), Box<dyn std::error::Error>>(())
78//! ```
79//!
80//! ## Custom Pipelines
81//!
82//! Build custom processing chains for specialized needs:
83//!
84//! ```rust,ignore
85//! use lex_parser::lex::transforms::{Transform, standard::CORE_TOKENIZATION};
86//!
87//! let custom = CORE_TOKENIZATION
88//!     .then(MyCustomStage::new())
89//!     .then(AnotherStage::new());
90//!
91//! let result = custom.run(source)?;
92//! ```
93//!
94//! # Module Organization
95//!
96//! - [`stages`]: Individual transformation stages (tokenization, indentation, parsing)
97//! - [`standard`]: Pre-built transform combinations for common use cases
98//!
99//! # Design Benefits
100//!
101//! - Type Safety: Compiler verifies pipeline stage compatibility
102//! - Composability: Mix and match stages to create custom pipelines
103//! - Reusability: Share transforms across CLI, tests, and library code
104//! - Clarity: Explicit stage boundaries with clear input/output types
105//! - Testability: Test individual stages in isolation
106
107pub mod stages;
108pub mod standard;
109
110use std::fmt;
111
112/// Error that can occur during transformation
113#[derive(Debug, Clone, PartialEq)]
114pub enum TransformError {
115    /// Generic error with message
116    Error(String),
117    /// Stage failed with specific error
118    StageFailed { stage: String, message: String },
119}
120
121impl fmt::Display for TransformError {
122    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
123        match self {
124            TransformError::Error(msg) => write!(f, "{msg}"),
125            TransformError::StageFailed { stage, message } => {
126                write!(f, "Stage '{stage}' failed: {message}")
127            }
128        }
129    }
130}
131
132impl std::error::Error for TransformError {}
133
134impl From<String> for TransformError {
135    fn from(s: String) -> Self {
136        TransformError::Error(s)
137    }
138}
139
140impl From<&str> for TransformError {
141    fn from(s: &str) -> Self {
142        TransformError::Error(s.to_string())
143    }
144}
145
146/// Trait for anything that can transform an input to an output
147///
148/// This is implemented by individual transformation stages.
149/// The `Transform` struct composes multiple `Runnable` implementations.
150pub trait Runnable<I, O> {
151    /// Execute this transformation on the input
152    fn run(&self, input: I) -> Result<O, TransformError>;
153}
154
155/// A composable transformation pipeline
156///
157/// `Transform<I, O>` represents a transformation from type `I` to type `O`.
158/// It can be composed with other transforms using the `add` method.
159///
160/// # Type Safety
161///
162/// The type system ensures that transforms can only be composed if their
163/// input/output types are compatible. For example:
164///
165/// ```rust,compile_fail
166/// let t1: Transform<String, TokenStream> = ...;
167/// let t2: Transform<Document, String> = ...;
168///
169/// // This will fail to compile - TokenStream != Document
170/// let bad = t1.add_transform(&t2);
171/// ```
172pub struct Transform<I, O> {
173    run_fn: Box<dyn Fn(I) -> Result<O, TransformError> + Send + Sync>,
174}
175
176impl<I, O> Transform<I, O> {
177    /// Create a new identity transform that passes input through unchanged
178    ///
179    /// Note: This only works when `I = O`
180    pub fn identity() -> Self
181    where
182        I: Clone + 'static,
183        O: From<I> + 'static,
184    {
185        Transform {
186            run_fn: Box::new(|input| Ok(O::from(input))),
187        }
188    }
189
190    /// Create a transform from a function
191    pub fn from_fn<F>(f: F) -> Self
192    where
193        F: Fn(I) -> Result<O, TransformError> + Send + Sync + 'static,
194    {
195        Transform {
196            run_fn: Box::new(f),
197        }
198    }
199
200    /// Add a stage to this transform, returning a new transform with extended output type
201    ///
202    /// This is the core composition method. It chains this transform's output into
203    /// the next stage's input, creating a new transform from `I` to `O2`.
204    ///
205    /// # Type Safety
206    ///
207    /// The compiler ensures that the stage's input type matches this transform's
208    /// output type.
209    ///
210    /// # Example
211    ///
212    /// ```rust
213    /// let t1: Transform<String, Tokens> = ...;
214    /// let t2: Transform<Tokens, Ast> = ...;
215    ///
216    /// let combined: Transform<String, Ast> = t1.then_transform(&t2);
217    /// ```
218    pub fn then<O2, S>(self, stage: S) -> Transform<I, O2>
219    where
220        S: Runnable<O, O2> + Send + Sync + 'static,
221        I: 'static,
222        O: 'static,
223        O2: 'static,
224    {
225        let prev_run = self.run_fn;
226        Transform {
227            run_fn: Box::new(move |input| {
228                let intermediate = prev_run(input)?;
229                stage.run(intermediate)
230            }),
231        }
232    }
233
234    /// Chain another transform to this transform
235    ///
236    /// This is similar to `then` but takes a `Transform` instead of a `Runnable`.
237    /// Useful for composing pre-built transform pipelines.
238    ///
239    /// The referenced transform must have a static lifetime (typically created with `lazy_static!`).
240    pub fn then_transform<O2>(self, next: &'static Transform<O, O2>) -> Transform<I, O2>
241    where
242        I: 'static,
243        O: 'static,
244        O2: 'static,
245    {
246        let prev_run = self.run_fn;
247        Transform {
248            run_fn: Box::new(move |input| {
249                let intermediate = prev_run(input)?;
250                next.run(intermediate)
251            }),
252        }
253    }
254
255    /// Execute this transform on the given input
256    pub fn run(&self, input: I) -> Result<O, TransformError> {
257        (self.run_fn)(input)
258    }
259}
260
261// Implement Runnable for Transform so transforms can be used as stages
262impl<I, O> Runnable<I, O> for Transform<I, O>
263where
264    I: 'static,
265    O: 'static,
266{
267    fn run(&self, input: I) -> Result<O, TransformError> {
268        Transform::run(self, input)
269    }
270}
271
272// Helper for creating transforms from closures
273impl<I, O> Transform<I, O> {
274    /// Create a new transform with no stages (useful as a starting point for composition)
275    pub fn new<F>(f: F) -> Self
276    where
277        F: Fn(I) -> Result<O, TransformError> + Send + Sync + 'static,
278    {
279        Transform::from_fn(f)
280    }
281}
282
283#[cfg(test)]
284mod tests {
285    use super::*;
286
287    // Test helpers - simple stages for composition
288    struct DoubleNumber;
289    impl Runnable<i32, i32> for DoubleNumber {
290        fn run(&self, input: i32) -> Result<i32, TransformError> {
291            Ok(input * 2)
292        }
293    }
294
295    struct AddTen;
296    impl Runnable<i32, i32> for AddTen {
297        fn run(&self, input: i32) -> Result<i32, TransformError> {
298            Ok(input + 10)
299        }
300    }
301
302    struct IntToString;
303    impl Runnable<i32, String> for IntToString {
304        fn run(&self, input: i32) -> Result<String, TransformError> {
305            Ok(input.to_string())
306        }
307    }
308
309    struct FailingStage;
310    impl Runnable<i32, i32> for FailingStage {
311        fn run(&self, _input: i32) -> Result<i32, TransformError> {
312            Err(TransformError::Error("intentional failure".to_string()))
313        }
314    }
315
316    #[test]
317    fn test_transform_from_fn() {
318        let transform = Transform::from_fn(|x: i32| Ok(x * 2));
319        assert_eq!(transform.run(5).unwrap(), 10);
320    }
321
322    #[test]
323    fn test_single_stage() {
324        let transform = Transform::from_fn(|x: i32| Ok(x)).then(DoubleNumber);
325        assert_eq!(transform.run(5).unwrap(), 10);
326    }
327
328    #[test]
329    fn test_multiple_same_type_stages() {
330        let transform = Transform::from_fn(|x: i32| Ok(x))
331            .then(DoubleNumber)
332            .then(AddTen)
333            .then(DoubleNumber);
334
335        // (5 * 2) + 10 = 20, then 20 * 2 = 40
336        assert_eq!(transform.run(5).unwrap(), 40);
337    }
338
339    #[test]
340    fn test_type_changing_stage() {
341        let transform = Transform::from_fn(|x: i32| Ok(x))
342            .then(DoubleNumber)
343            .then(IntToString);
344
345        assert_eq!(transform.run(5).unwrap(), "10");
346    }
347
348    #[test]
349    fn test_error_propagation() {
350        let transform = Transform::from_fn(|x: i32| Ok(x))
351            .then(DoubleNumber)
352            .then(FailingStage)
353            .then(AddTen);
354
355        let result = transform.run(5);
356        assert!(result.is_err());
357        assert_eq!(
358            result.unwrap_err(),
359            TransformError::Error("intentional failure".to_string())
360        );
361    }
362
363    #[test]
364    fn test_transform_composition() {
365        // Build sub-transforms
366        let double_and_add = Transform::from_fn(|x: i32| Ok(x))
367            .then(DoubleNumber)
368            .then(AddTen);
369
370        let to_string = Transform::from_fn(|x: i32| Ok(x)).then(IntToString);
371
372        // Compose them (note: we need to use static refs for then_transform)
373        // For now, just test that individual transforms work
374        assert_eq!(double_and_add.run(5).unwrap(), 20);
375        assert_eq!(to_string.run(5).unwrap(), "5");
376    }
377
378    #[test]
379    fn test_error_display() {
380        let err = TransformError::Error("test error".to_string());
381        assert_eq!(format!("{err}"), "test error");
382
383        let stage_err = TransformError::StageFailed {
384            stage: "tokenization".to_string(),
385            message: "invalid token".to_string(),
386        };
387        assert_eq!(
388            format!("{stage_err}"),
389            "Stage 'tokenization' failed: invalid token"
390        );
391    }
392
393    #[test]
394    fn test_error_conversion() {
395        let err1: TransformError = "string error".into();
396        assert_eq!(err1, TransformError::Error("string error".to_string()));
397
398        let err2: TransformError = "owned string".to_string().into();
399        assert_eq!(err2, TransformError::Error("owned string".to_string()));
400    }
401}