ggen-core 26.7.2

Core graph-aware code generation engine
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
//! μ₄: Code Canonicalization
//!
//! This module provides canonicalization (formatting) for generated code.
//! It ensures deterministic output through:
//! - Import sorting (alphabetically)
//! - Whitespace normalization
//! - Generated header injection
//! - Language-specific formatting patterns
//!
//! ## A2A-RS Integration
//!
//! For A2A-RS generated code, this module:
//! 1. Sorts imports alphabetically (std → external → internal)
//! 2. Normalizes line endings to LF
//! 3. Removes trailing whitespace
//! 4. Ensures final newline
//! 5. Injects generated code header with warning

use crate::utils::error::{Error, Result};
use regex::Regex;
use std::collections::BTreeSet;
use std::path::Path;

/// Text-formatting steps for canonicalization (≤3 bools)
#[derive(Debug, Clone, Default)]
pub struct FormattingFlags {
    /// Whether to sort imports
    pub sort_imports: bool,
    /// Whether to normalize line endings
    pub normalize_line_endings: bool,
    /// Whether to remove trailing whitespace
    pub trim_trailing_whitespace: bool,
}

/// Boolean feature-flags for canonicalization steps
#[derive(Debug, Clone, Default)]
pub struct CanonicalizeFlags {
    /// Text formatting options
    pub formatting: FormattingFlags,
    /// Whether to inject generated header
    pub inject_header: bool,
}

/// Canonicalization options for generated code
#[derive(Debug, Clone, Default)]
pub struct CanonicalizeOptions {
    /// Feature flags controlling which canonicalization steps run
    pub flags: CanonicalizeFlags,

    /// Custom header content (if None, uses default)
    pub custom_header: Option<String>,
}

impl CanonicalizeOptions {
    /// Create new options with defaults
    pub fn new() -> Self {
        Self::default()
    }

    /// Enable import sorting
    pub fn with_sort_imports(mut self, enabled: bool) -> Self {
        self.flags.formatting.sort_imports = enabled;
        self
    }

    /// Enable line ending normalization
    pub fn with_normalize_line_endings(mut self, enabled: bool) -> Self {
        self.flags.formatting.normalize_line_endings = enabled;
        self
    }

    /// Enable header injection
    pub fn with_inject_header(mut self, enabled: bool) -> Self {
        self.flags.inject_header = enabled;
        self
    }

    /// Set custom header
    pub fn with_custom_header(mut self, header: String) -> Self {
        self.custom_header = Some(header);
        self
    }

    /// Enable trailing whitespace removal
    pub fn with_trim_trailing_whitespace(mut self, enabled: bool) -> Self {
        self.flags.formatting.trim_trailing_whitespace = enabled;
        self
    }
}

/// A parsed Rust import statement
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
struct RustImport {
    /// The full import line
    full_line: String,
    /// The import path (e.g., "std::collections::HashMap")
    path: String,
    /// Import type: use, mod, extern crate
    import_type: String,
    /// Import group: std, external, or internal
    group: ImportGroup,
}

/// Import group for sorting
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
enum ImportGroup {
    /// std library imports
    Std,
    /// External crate imports
    External,
    /// Internal/crate imports
    Internal,
}

impl RustImport {
    /// Parse a use statement
    fn parse(line: &str) -> Option<Self> {
        let trimmed = line.trim();

        // Match `use` statements
        if !trimmed.starts_with("use ") {
            return None;
        }

        // Remove `use ` prefix and `;` suffix
        let path_part = trimmed[4..].trim_end_matches(';').trim();

        // Skip self/super aliases (relative paths within same crate)
        if path_part.starts_with("self::") || path_part.starts_with("super::") {
            return None;
        }

        // Determine group
        let group = if path_part.starts_with("std::")
            || path_part.starts_with("core::")
            || path_part.starts_with("alloc::")
        {
            ImportGroup::Std
        } else if path_part.starts_with("crate::") {
            // crate:: paths are internal (same workspace)
            ImportGroup::Internal
        } else if path_part.contains("::") {
            // External crate (e.g., serde::Serialize)
            ImportGroup::External
        } else {
            ImportGroup::Internal
        };

        Some(RustImport {
            full_line: trimmed.to_string(),
            path: path_part.to_string(),
            import_type: "use".to_string(),
            group,
        })
    }

    /// Get the sorting key for this import
    fn sort_key(&self) -> (ImportGroup, String) {
        (self.group, self.path.clone())
    }
}

/// Canonicalize generated Rust code
///
/// # Arguments
/// * `content` - The generated code to canonicalize
/// * `options` - Canonicalization options
///
/// # Returns
/// Canonicalized code string
pub fn canonicalize(content: &str, options: &CanonicalizeOptions) -> Result<String> {
    let mut result = content.to_string();

    // Step 1: Normalize line endings
    if options.flags.formatting.normalize_line_endings {
        result = normalize_line_endings(&result);
    }

    // Step 2: Trim trailing whitespace
    if options.flags.formatting.trim_trailing_whitespace {
        result = trim_trailing_whitespace(&result);
    }

    // Step 3: Sort imports (before header injection)
    if options.flags.formatting.sort_imports {
        result = sort_rust_imports(&result)?;
    }

    // Step 4: Inject header (last, so it's at the very top)
    if options.flags.inject_header {
        let header = options
            .custom_header
            .clone()
            .unwrap_or_else(default_generated_header);
        result = inject_header(&result, &header);
    }

    // Ensure final newline
    if !result.ends_with('\n') {
        result.push('\n');
    }

    Ok(result)
}

/// Normalize line endings to LF
fn normalize_line_endings(content: &str) -> String {
    content.replace("\r\n", "\n").replace('\r', "\n")
}

/// Trim trailing whitespace from each line
fn trim_trailing_whitespace(content: &str) -> String {
    let trimmed = content
        .lines()
        .map(|line| line.trim_end())
        .collect::<Vec<_>>()
        .join("\n");
    // Preserve trailing newline if original had one
    if content.ends_with('\n') {
        format!("{}\n", trimmed)
    } else {
        trimmed
    }
}

/// Sort Rust imports alphabetically within groups (std → external → internal)
fn sort_rust_imports(content: &str) -> Result<String> {
    let lines: Vec<&str> = content.lines().collect();

    // Find import blocks
    let mut result = String::new();
    let mut i = 0;

    while i < lines.len() {
        let line = lines[i];

        // Start of import block?
        if line.trim().starts_with("use ") {
            // Collect all consecutive imports
            let mut imports = Vec::new();

            while i < lines.len() {
                let trimmed = lines[i].trim();

                // Check if this is still an import line or comment
                if trimmed.starts_with("use ") {
                    if let Some(import) = RustImport::parse(lines[i]) {
                        imports.push(import);
                    } else {
                        // Couldn't parse, keep as-is
                        result.push_str(lines[i]);
                        result.push('\n');
                    }
                    i += 1;
                } else if trimmed.is_empty() || trimmed.starts_with("//") {
                    // Blank lines and comments break import blocks
                    i += 1;
                    break;
                } else {
                    break;
                }
            }

            // Sort imports: std → external → internal
            imports.sort_by_key(|imp| imp.sort_key());

            // Group by import type for better organization
            let mut std_imports: BTreeSet<String> = BTreeSet::new();
            let mut external_imports: BTreeSet<String> = BTreeSet::new();
            let mut internal_imports: BTreeSet<String> = BTreeSet::new();

            for imp in &imports {
                match imp.group {
                    ImportGroup::Std => {
                        std_imports.insert(imp.full_line.clone());
                    }
                    ImportGroup::External => {
                        external_imports.insert(imp.full_line.clone());
                    }
                    ImportGroup::Internal => {
                        internal_imports.insert(imp.full_line.clone());
                    }
                }
            }

            // Write sorted imports
            for imp in &std_imports {
                result.push_str(imp);
                result.push('\n');
            }
            if !std_imports.is_empty() && !external_imports.is_empty() {
                result.push('\n');
            }
            for imp in &external_imports {
                result.push_str(imp);
                result.push('\n');
            }
            if !external_imports.is_empty() && !internal_imports.is_empty() {
                result.push('\n');
            }
            for imp in &internal_imports {
                result.push_str(imp);
                result.push('\n');
            }

            // Add blank line after import block if next line is not blank
            if i < lines.len() && !lines[i].trim().is_empty() {
                result.push('\n');
            }

            continue;
        }

        // Non-import line
        result.push_str(line);
        result.push('\n');
        i += 1;
    }

    Ok(result)
}

/// Inject generated header at the top of the file
fn inject_header(content: &str, header: &str) -> String {
    // Check if header already exists
    let has_header = content
        .lines()
        .take(5)
        .any(|line| line.contains("DO NOT EDIT") || line.contains("Generated by"));

    if has_header {
        return content.to_string();
    }

    // Check for shebang
    let (shebang, rest) = if content.starts_with("#!") {
        let lines: Vec<&str> = content.lines().collect();
        if let Some(idx) = lines.iter().position(|l| l.starts_with("#!")) {
            let shebang_line = lines[idx];
            let remaining = lines[idx + 1..].join("\n");
            (Some(shebang_line.to_string()), remaining)
        } else {
            (None, content.to_string())
        }
    } else {
        (None, content.to_string())
    };

    let header = format!("{}\n", header);

    match shebang {
        Some(s) => format!("{}\n{}{}", s, header, rest),
        None => format!("{}{}", header, content),
    }
}

/// Default generated code header
fn default_generated_header() -> String {
    "// DO NOT EDIT\n\
     // Generated by ggen (https://github.com/seanchatmangpt/ggen)\n\
     //\n\
     // Changes to this file will be overwritten when the code is regenerated.\n\
     // Please modify the source ontology (.ttl files) and templates instead.\n\
     //\n\
     // Generated at: "
        .to_string()
        + &chrono::Utc::now().to_rfc3339()
}

/// Get the generated header with timestamp
pub fn get_generated_header() -> String {
    default_generated_header()
}

/// Canonicalize a file by path
///
/// # Arguments
/// * `file_path` - Path to the file (used for format detection)
/// * `content` - The file content
/// * `options` - Canonicalization options
///
/// # Returns
/// Canonicalized content
pub fn canonicalize_file(
    file_path: &Path, content: &str, options: &CanonicalizeOptions,
) -> Result<String> {
    // Detect file type from extension
    let ext = file_path.extension().and_then(|e| e.to_str()).unwrap_or("");

    match ext {
        "rs" => canonicalize_rust(content, options),
        "toml" => canonicalize_toml(content, options),
        "json" => canonicalize_json(content, options),
        "ttl" | "turtle" => canonicalize_ttl(content, options),
        _ => canonicalize_generic(content, options),
    }
}

/// Canonicalize Rust code
fn canonicalize_rust(content: &str, options: &CanonicalizeOptions) -> Result<String> {
    canonicalize(content, options)
}

/// Canonicalize TOML (basic: normalize line endings)
fn canonicalize_toml(content: &str, options: &CanonicalizeOptions) -> Result<String> {
    let mut result = content.to_string();

    if options.flags.formatting.normalize_line_endings {
        result = normalize_line_endings(&result);
    }

    if options.flags.formatting.trim_trailing_whitespace {
        result = trim_trailing_whitespace(&result);
    }

    if !result.ends_with('\n') {
        result.push('\n');
    }

    Ok(result)
}

/// Canonicalize JSON — always pretty-prints for deterministic output
fn canonicalize_json(content: &str, _options: &CanonicalizeOptions) -> Result<String> {
    // Always parse and pretty-print JSON for canonical, deterministic output
    let value: serde_json::Value =
        serde_json::from_str(content).map_err(|e| Error::new(&format!("Invalid JSON: {}", e)))?;

    let mut result = serde_json::to_string_pretty(&value)
        .map_err(|e| Error::new(&format!("JSON serialization error: {}", e)))?;

    if !result.ends_with('\n') {
        result.push('\n');
    }

    Ok(result)
}

/// Canonicalize Turtle/RDF (basic: normalize line endings)
fn canonicalize_ttl(content: &str, options: &CanonicalizeOptions) -> Result<String> {
    let mut result = content.to_string();

    if options.flags.formatting.normalize_line_endings {
        result = normalize_line_endings(&result);
    }

    if options.flags.formatting.trim_trailing_whitespace {
        result = trim_trailing_whitespace(&result);
    }

    // Remove consecutive blank lines
    let re = Regex::new(r"\n{3,}").map_err(|e| Error::new(&format!("Regex error: {}", e)))?;
    result = re.replace_all(&result, "\n\n").to_string();

    if !result.ends_with('\n') {
        result.push('\n');
    }

    Ok(result)
}

/// Generic canonicalization (line endings + whitespace only)
fn canonicalize_generic(content: &str, options: &CanonicalizeOptions) -> Result<String> {
    let mut result = content.to_string();

    if options.flags.formatting.normalize_line_endings {
        result = normalize_line_endings(&result);
    }

    if options.flags.formatting.trim_trailing_whitespace {
        result = trim_trailing_whitespace(&result);
    }

    if !result.ends_with('\n') {
        result.push('\n');
    }

    Ok(result)
}

/// Quick canonicalize with default options for Rust files
pub fn canonicalize_rust_quick(content: &str) -> Result<String> {
    let options = CanonicalizeOptions::new()
        .with_sort_imports(true)
        .with_normalize_line_endings(true)
        .with_trim_trailing_whitespace(true)
        .with_inject_header(false); // Header injected separately if needed

    canonicalize_rust(content, &options)
}

/// Canonicalize with A2A-RS standard options
///
/// This is the recommended canonicalization for A2A-RS generated code.
pub fn canonicalize_a2a(content: &str) -> Result<String> {
    let options = CanonicalizeOptions::new()
        .with_sort_imports(true)
        .with_normalize_line_endings(true)
        .with_trim_trailing_whitespace(true)
        .with_inject_header(true);

    canonicalize_rust(content, &options)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_normalize_line_endings() {
        let input = "hello\r\nworld\r";
        let result = normalize_line_endings(input);
        assert_eq!(result, "hello\nworld\n");
    }

    #[test]
    fn test_trim_trailing_whitespace() {
        let input = "hello   \nworld  \n  test  \n";
        let result = trim_trailing_whitespace(input);
        assert_eq!(result, "hello\nworld\n  test\n");
    }

    #[test]
    fn test_inject_header() {
        let content = "fn main() {}\n";
        let header = "// Generated header\n";
        let result = inject_header(content, header);
        assert!(result.starts_with("// Generated header\n"));
        assert!(result.contains("fn main() {}\n"));
    }

    #[test]
    fn test_inject_header_preserves_existing() {
        let content = "// DO NOT EDIT this file\nfn main() {}\n";
        let header = "// Generated header\n";
        let result = inject_header(content, header);
        // Should not duplicate
        assert_eq!(result, content);
    }

    #[test]
    fn test_rust_import_parse() {
        let line = "use std::collections::HashMap;";
        let import = RustImport::parse(line);
        assert!(import.is_some());
        let imp = import.unwrap();
        assert_eq!(imp.path, "std::collections::HashMap");
        assert_eq!(imp.group, ImportGroup::Std);
    }

    #[test]
    fn test_rust_import_sorting() {
        let input = r#"
use crate::module::inner::Struct;
use std::collections::HashMap;
use external_crate::Trait;
use std::vec::Vec;
use crate::module::Type;

fn main() {}
"#;

        let result = sort_rust_imports(input).unwrap();
        let lines: Vec<&str> = result.lines().collect();

        // Find import positions
        let std_idx = lines.iter().position(|l| l.contains("std::collections"));
        let vec_idx = lines.iter().position(|l| l.contains("std::vec"));
        let external_idx = lines.iter().position(|l| l.contains("external_crate"));
        let internal_idx = lines.iter().position(|l| l.contains("crate::module"));

        // std imports should come before external
        assert!(std_idx.unwrap() < external_idx.unwrap());
        // external should come before internal
        assert!(external_idx.unwrap() < internal_idx.unwrap());

        // std imports should be sorted
        assert!(std_idx.unwrap() < vec_idx.unwrap());
    }

    #[test]
    fn test_canonicalize_rust() {
        let input = r#"use std::b::B;
use std::a::A;
fn main() { println!("hello"); }"#;

        let options = CanonicalizeOptions::new()
            .with_sort_imports(true)
            .with_normalize_line_endings(true);

        let result = canonicalize(input, &options).unwrap();

        // std::a should come before std::b
        let a_pos = result.find("use std::a").unwrap();
        let b_pos = result.find("use std::b").unwrap();
        assert!(a_pos < b_pos);
    }

    #[test]
    fn test_canonicalize_a2a() {
        let input = r#"use std::b::B;
use std::a::A;

fn main() {}
"#;

        let result = canonicalize_a2a(input).unwrap();

        // Should have header
        assert!(result.contains("DO NOT EDIT"));
        assert!(result.contains("Generated by ggen"));

        // std::a should come before std::b
        let a_pos = result.find("use std::a").unwrap();
        let b_pos = result.find("use std::b").unwrap();
        assert!(a_pos < b_pos);
    }

    #[test]
    fn test_canonicalize_json() {
        let input = r#"{"a":1,"b":2}"#;
        let options = CanonicalizeOptions::new();
        let result = canonicalize_json(input, &options).unwrap();
        assert!(result.contains('\n')); // Pretty-printed
        assert!(result.ends_with('\n'));
    }

    #[test]
    fn test_canonicalize_toml() {
        let input = "[package]\r\nname = \"test\"\r\n";
        let options = CanonicalizeOptions::new().with_normalize_line_endings(true);
        let result = canonicalize_toml(input, &options).unwrap();
        assert!(!result.contains("\r\n"));
        assert_eq!(result, "[package]\nname = \"test\"\n");
    }

    #[test]
    fn test_import_group_detection() {
        assert_eq!(
            RustImport::parse("use std::collections::HashMap;")
                .unwrap()
                .group,
            ImportGroup::Std
        );
        assert_eq!(
            RustImport::parse("use serde::Serialize;").unwrap().group,
            ImportGroup::External
        );
        assert_eq!(
            RustImport::parse("use crate::module::Struct;")
                .unwrap()
                .group,
            ImportGroup::Internal
        );
    }

    #[test]
    fn test_self_import_skipped() {
        // self imports should not be sorted
        let result = RustImport::parse("use self::Struct;");
        assert!(result.is_none());
    }

    #[test]
    fn test_canonicalize_quick() {
        let input = r#"use std::b::B;
use std::a::A;
"#;

        let result = canonicalize_rust_quick(input).unwrap();

        // std::a should come before std::b
        let a_pos = result.find("use std::a").unwrap();
        let b_pos = result.find("use std::b").unwrap();
        assert!(a_pos < b_pos);

        // Should end with newline
        assert!(result.ends_with('\n'));
    }
}