ggen-core 26.6.25

Core graph-aware code generation engine
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
//! Template preprocessor pipeline for deterministic text transformations
//!
//! This module provides a preprocessing pipeline that applies deterministic text
//! transformations to templates before rendering. The preprocessor handles freeze
//! blocks, includes, and macros in a controlled order to ensure reproducible output.
//!
//! ## Pipeline Order
//!
//! The preprocessing pipeline executes in the following order:
//! 1. **Preprocessor**: Freeze, includes, macros
//! 2. **Render Frontmatter**: Tera rendering on YAML frontmatter
//! 3. **Graph Operations**: RDF loading and SPARQL query execution
//! 4. **Body Render**: Final Tera rendering of template body
//!
//! ## Preprocessing Stages
//!
//! - **Freeze**: Replace `{% freeze %}` blocks with cached slot contents
//!   - Preserves manual edits in generated files
//!   - Supports checksum-based validation
//! - **Includes**: Process `{% include %}` directives
//!   - Resolve template includes recursively
//!   - Support for relative and absolute paths
//! - **Macros**: Expand template macros
//!   - Macro definition and invocation
//!   - Parameter passing and scoping
//!
//! ## Error Model
//!
//! - Each stage returns contextual errors with:
//!   - Stage name for identification
//!   - Byte span for error location
//!   - Code snippet for context
//! - No partial silent edits - all transformations are explicit
//!
//! ## Examples
//!
//! ### Using the Preprocessor
//!
//! ```rust,no_run
//! use crate::preprocessor::{Preprocessor, PrepCtx};
//! use std::path::Path;
//!
//! # fn main() -> crate::utils::error::Result<()> {
//! let preprocessor = Preprocessor::with_default_stages();
//! let ctx = PrepCtx {
//!     template_path: Path::new("template.tmpl"),
//!     out_dir: Path::new("output"),
//!     vars_json: &serde_json::json!({}),
//! };
//!
//! let input = r#"
//! {% freeze slot="header" %}
//! // Generated header
//! {% endfreeze %}
//! "#.to_string();
//!
//! let output = preprocessor.run(input, &ctx)?;
//! # Ok(())
//! # }
//! ```

use crate::utils::error::{Error, Result};
use serde_json;
use std::fs;
use std::path::{Path, PathBuf};

/// Preprocessor context for stage execution
///
/// Provides template path, output directory, and optional JSON variables
/// to preprocessing stages during pipeline execution.
pub struct PrepCtx<'a> {
    /// Path to the template file being processed.
    pub template_path: &'a Path,
    /// Output directory for generated files.
    pub out_dir: &'a Path,
    /// Optional JSON variables for template rendering.
    pub vars_json: &'a serde_json::Value, // optional, read-only
}

/// Trait for preprocessing stages
///
/// Defines the interface for pipeline stages that transform template content.
pub trait Stage: Send + Sync {
    /// Returns the name of this preprocessing stage.
    fn name(&self) -> &'static str;
    /// Runs the stage on the input string with the given context.
    fn run(&self, input: &str, ctx: &PrepCtx) -> Result<String>;
}

/// Freeze policy for handling cached content
///
/// Determines when to use cached content vs regenerating from templates.
///
/// # Examples
///
/// ```rust
/// use crate::preprocessor::FreezePolicy;
///
/// # fn main() {
/// let policy = FreezePolicy::Always;
/// match policy {
///     FreezePolicy::Always => assert!(true),
///     FreezePolicy::Checksum => assert!(true),
///     FreezePolicy::Never => assert!(true),
/// }
/// # }
/// ```
#[derive(Debug, Clone)]
pub enum FreezePolicy {
    /// Always use cached content if available
    Always,
    /// Use cached content only if checksum matches
    Checksum,
    /// Never use cached content (always regenerate)
    Never,
}

/// Freeze stage for processing {% freeze %} blocks
///
/// Processes `{% freeze %}` blocks in templates, managing cached content
/// according to the specified freeze policy.
pub struct FreezeStage {
    /// Directory where cached slot content is stored.
    pub slots_dir: PathBuf,
    /// Policy determining when to use cached content.
    pub policy: FreezePolicy,
}

impl Stage for FreezeStage {
    fn name(&self) -> &'static str {
        "freeze"
    }

    fn run(&self, input: &str, ctx: &PrepCtx) -> Result<String> {
        process_freeze_blocks(input, ctx, self)
    }
}

/// Include stage for processing {% include %} directives
///
/// Processes `{% include %}` directives in templates, resolving and inserting
/// external template content.
pub struct IncludeStage {
    /// Directories to search for included template files.
    pub template_dirs: Vec<PathBuf>,
}

impl Stage for IncludeStage {
    fn name(&self) -> &'static str {
        "include"
    }

    fn run(&self, input: &str, ctx: &PrepCtx) -> Result<String> {
        process_includes(input, ctx, self)
    }
}

/// Main preprocessor orchestrating multiple stages
///
/// Coordinates a pipeline of preprocessing stages that transform template
/// content deterministically before rendering.
pub struct Preprocessor {
    /// Ordered list of preprocessing stages.
    stages: Vec<Box<dyn Stage>>,
}

impl Default for Preprocessor {
    fn default() -> Self {
        Self::new()
    }
}

impl Preprocessor {
    /// Create a new preprocessor with no stages
    pub fn new() -> Self {
        Self { stages: Vec::new() }
    }

    /// Add a stage to the pipeline
    pub fn with<S: Stage + 'static>(mut self, stage: S) -> Self {
        self.stages.push(Box::new(stage));
        self
    }

    /// Run all stages in order on the input
    pub fn run(&self, mut input: String, ctx: &PrepCtx) -> Result<String> {
        for stage in &self.stages {
            input = stage.run(&input, ctx).map_err(|e| {
                Error::with_source(&format!("Stage '{}' failed", stage.name()), Box::new(e))
            })?;
        }
        Ok(input)
    }

    /// Create a preprocessor with common stages
    pub fn with_default_stages() -> Self {
        Self::new().with(IncludeStage {
            template_dirs: vec![PathBuf::from("templates")],
        })
    }

    /// Create a preprocessor with freeze support
    pub fn with_freeze(slots_dir: PathBuf, policy: FreezePolicy) -> Self {
        Self::new()
            .with(IncludeStage {
                template_dirs: vec![PathBuf::from("templates")],
            })
            .with(FreezeStage { slots_dir, policy })
    }
}

/// Process {% freeze %} blocks in template content
fn process_freeze_blocks(input: &str, ctx: &PrepCtx, stage: &FreezeStage) -> Result<String> {
    let mut result = String::new();
    let mut pos = 0;

    // Find all freeze blocks
    while let Some(start) = input[pos..].find("{% startfreeze") {
        let absolute_start = pos + start;

        // Find the end of the start tag
        let end_start = if let Some(end) = input[absolute_start..].find("%}") {
            absolute_start + end + 2
        } else {
            return Err(Error::new(&format!(
                "Unclosed startfreeze tag at position {}",
                absolute_start
            )));
        };

        // Find the matching endfreeze tag
        let endfreeze_start = if let Some(end) = input[end_start..].find("{% endfreeze %}") {
            end_start + end
        } else {
            return Err(Error::new(&format!(
                "Missing endfreeze tag for startfreeze at position {}",
                absolute_start
            )));
        };

        let endfreeze_end = endfreeze_start + "{% endfreeze %}".len();

        // Extract block content and attributes
        let block_content = &input[end_start..endfreeze_start];
        let start_tag = &input[absolute_start..end_start];

        // Parse attributes from start tag
        let (id, checksum) = parse_freeze_attributes(start_tag)?;

        // Generate default id/checksum if missing
        let final_id = id.unwrap_or_else(|| generate_default_id(ctx.template_path, absolute_start));
        let final_checksum = checksum.unwrap_or_else(|| generate_checksum(block_content));

        // Process based on policy
        let processed_content = match stage.policy {
            FreezePolicy::Always => {
                load_or_generate_slot(&stage.slots_dir, &final_id, &final_checksum, block_content)?
            }
            FreezePolicy::Checksum => {
                load_or_generate_slot(&stage.slots_dir, &final_id, &final_checksum, block_content)?
            }
            FreezePolicy::Never => {
                generate_and_save_slot(&stage.slots_dir, &final_id, &final_checksum, block_content)?
            }
        };

        // Add content before this block
        result.push_str(&input[pos..absolute_start]);
        // Add processed content
        result.push_str(&processed_content);

        // Move position past this block
        pos = endfreeze_end;
    }

    // Add remaining content
    result.push_str(&input[pos..]);
    Ok(result)
}

/// Process {% include %} directives in template content
fn process_includes(input: &str, ctx: &PrepCtx, stage: &IncludeStage) -> Result<String> {
    let mut result = String::new();
    let mut pos = 0;

    // Find all include directives
    while let Some(start) = input[pos..].find("{% include") {
        let absolute_start = pos + start;

        // Find the end of the include tag
        let end_start = if let Some(end) = input[absolute_start..].find("%}") {
            absolute_start + end + 2
        } else {
            return Err(Error::new(&format!(
                "Unclosed include tag at position {}",
                absolute_start
            )));
        };

        // Extract include path from tag
        let include_tag = &input[absolute_start..end_start];
        let include_path = parse_include_path(include_tag)?;

        // Resolve include path
        let resolved_path = resolve_include_path(&include_path, ctx, &stage.template_dirs)?;

        // Read included content
        let included_content = fs::read_to_string(&resolved_path).map_err(|e| {
            Error::new(&format!(
                "Failed to read include file '{}': {}",
                resolved_path.display(),
                e
            ))
        })?;

        // Add content before this include
        result.push_str(&input[pos..absolute_start]);
        // Add included content
        result.push_str(&included_content);

        // Move position past this include
        pos = end_start;
    }

    // Add remaining content
    result.push_str(&input[pos..]);
    Ok(result)
}

/// Parse attributes from {% startfreeze %} tag
fn parse_freeze_attributes(tag: &str) -> Result<(Option<String>, Option<String>)> {
    let mut id = None;
    let mut checksum = None;

    // Simple attribute parsing - look for id="..." and checksum="..."
    if let Some(id_start) = tag.find("id=\"") {
        let id_start = id_start + 4;
        if let Some(id_end) = tag[id_start..].find('"') {
            id = Some(tag[id_start..id_start + id_end].to_string());
        }
    }

    if let Some(cs_start) = tag.find("checksum=\"") {
        let cs_start = cs_start + 10;
        if let Some(cs_end) = tag[cs_start..].find('"') {
            checksum = Some(tag[cs_start..cs_start + cs_end].to_string());
        }
    }

    Ok((id, checksum))
}

/// Parse include path from {% include %} tag
fn parse_include_path(tag: &str) -> Result<String> {
    // Extract path from {% include "path" %} or {% include 'path' %}
    let path_start = if let Some(start) = tag.find('"') {
        start + 1
    } else if let Some(start) = tag.find('\'') {
        start + 1
    } else {
        return Err(Error::new(&format!("Invalid include tag format: {}", tag)));
    };

    let path_end = if let Some(end) = tag[path_start..].find('"') {
        path_start + end
    } else if let Some(end) = tag[path_start..].find('\'') {
        path_start + end
    } else {
        return Err(Error::new(&format!(
            "Unclosed include path in tag: {}",
            tag
        )));
    };

    Ok(tag[path_start..path_end].to_string())
}

/// Resolve include path relative to template directories
fn resolve_include_path(
    include_path: &str, ctx: &PrepCtx, template_dirs: &[PathBuf],
) -> Result<PathBuf> {
    // Try relative to template file first
    if let Some(template_dir) = ctx.template_path.parent() {
        let relative_path = template_dir.join(include_path);
        if relative_path.exists() {
            return Ok(relative_path);
        }
    }

    // Try template directories
    for template_dir in template_dirs {
        let full_path = template_dir.join(include_path);
        if full_path.exists() {
            return Ok(full_path);
        }
    }

    Err(Error::new(&format!(
        "Include file not found: {}",
        include_path
    )))
}

/// Generate default ID for freeze block
fn generate_default_id(template_path: &Path, position: usize) -> String {
    use std::collections::hash_map::DefaultHasher;
    use std::hash::{Hash, Hasher};

    let mut hasher = DefaultHasher::new();
    template_path.hash(&mut hasher);
    position.hash(&mut hasher);
    format!("slot_{:x}", hasher.finish())
}

/// Generate checksum for content
fn generate_checksum(content: &str) -> String {
    use std::collections::hash_map::DefaultHasher;
    use std::hash::{Hash, Hasher};

    let mut hasher = DefaultHasher::new();
    content.hash(&mut hasher);
    format!("{:x}", hasher.finish())
}

/// Load cached slot or generate new content
fn load_or_generate_slot(
    slots_dir: &Path, id: &str, checksum: &str, content: &str,
) -> Result<String> {
    let slot_path = slots_dir.join(format!("{}.slot", id));
    let checksum_path = slots_dir.join(format!("{}.checksum", id));

    // Check if cached slot exists and checksum matches
    if slot_path.exists() && checksum_path.exists() {
        if let Ok(cached_checksum) = fs::read_to_string(&checksum_path) {
            if cached_checksum.trim() == checksum {
                return fs::read_to_string(&slot_path)
                    .map_err(|e| Error::with_source("Failed to read cached slot", Box::new(e)));
            }
        }
    }

    // Generate and save new slot
    generate_and_save_slot(slots_dir, id, checksum, content)
}

/// Generate and save slot content
fn generate_and_save_slot(
    slots_dir: &Path, id: &str, checksum: &str, content: &str,
) -> Result<String> {
    // Ensure slots directory exists
    fs::create_dir_all(slots_dir)
        .map_err(|e| Error::with_source("Failed to create slots directory", Box::new(e)))?;

    let slot_path = slots_dir.join(format!("{}.slot", id));
    let checksum_path = slots_dir.join(format!("{}.checksum", id));

    // Save content and checksum
    fs::write(&slot_path, content)
        .map_err(|e| Error::with_source("Failed to write slot file", Box::new(e)))?;
    fs::write(&checksum_path, checksum)
        .map_err(|e| Error::with_source("Failed to write checksum file", Box::new(e)))?;

    Ok(content.to_string())
}

#[cfg(test)]
mod tests {
    use super::*;
    use tempfile::TempDir;

    fn create_test_ctx() -> PrepCtx<'static> {
        PrepCtx {
            template_path: Path::new("test.tmpl"),
            out_dir: Path::new("output"),
            vars_json: &serde_json::Value::Null,
        }
    }

    #[test]
    fn test_freeze_stage_basic() -> Result<()> {
        let temp_dir = TempDir::new()?;
        let stage = FreezeStage {
            slots_dir: temp_dir.path().to_path_buf(),
            policy: FreezePolicy::Always,
        };

        let input = r#"Hello
{% startfreeze id="test" %}
World
{% endfreeze %}
!"#;

        let ctx = create_test_ctx();
        let result = stage.run(input, &ctx)?;

        assert!(result.contains("World"));
        assert!(!result.contains("startfreeze"));
        assert!(!result.contains("endfreeze"));

        Ok(())
    }

    #[test]
    fn test_include_stage_basic() -> Result<()> {
        let temp_dir = TempDir::new()?;
        let include_file = temp_dir.path().join("included.tmpl");
        fs::write(&include_file, "Included content")?;

        let stage = IncludeStage {
            template_dirs: vec![temp_dir.path().to_path_buf()],
        };

        let input = r#"Hello
{% include "included.tmpl" %}
!"#;

        let ctx = create_test_ctx();
        let result = stage.run(input, &ctx)?;

        assert!(result.contains("Included content"));
        assert!(!result.contains("include"));

        Ok(())
    }

    #[test]
    fn test_preprocessor_pipeline() -> Result<()> {
        let temp_dir = TempDir::new()?;
        let include_file = temp_dir.path().join("included.tmpl");
        fs::write(&include_file, "Included")?;

        let preprocessor = Preprocessor::new()
            .with(IncludeStage {
                template_dirs: vec![temp_dir.path().to_path_buf()],
            })
            .with(FreezeStage {
                slots_dir: temp_dir.path().join("slots"),
                policy: FreezePolicy::Always,
            });

        let input = r#"Hello
{% include "included.tmpl" %}
{% startfreeze id="test" %}
Frozen
{% endfreeze %}
!"#;

        let ctx = create_test_ctx();
        let result = preprocessor.run(input.to_string(), &ctx)?;

        assert!(result.contains("Included"));
        assert!(result.contains("Frozen"));
        assert!(!result.contains("include"));
        assert!(!result.contains("startfreeze"));

        Ok(())
    }
}