use crate::utils::error::{Error, Result};
use regex::Regex;
use std::collections::BTreeSet;
use std::path::Path;
#[derive(Debug, Clone, Default)]
pub struct FormattingFlags {
pub sort_imports: bool,
pub normalize_line_endings: bool,
pub trim_trailing_whitespace: bool,
}
#[derive(Debug, Clone, Default)]
pub struct CanonicalizeFlags {
pub formatting: FormattingFlags,
pub inject_header: bool,
}
#[derive(Debug, Clone, Default)]
pub struct CanonicalizeOptions {
pub flags: CanonicalizeFlags,
pub custom_header: Option<String>,
}
impl CanonicalizeOptions {
pub fn new() -> Self {
Self::default()
}
pub fn with_sort_imports(mut self, enabled: bool) -> Self {
self.flags.formatting.sort_imports = enabled;
self
}
pub fn with_normalize_line_endings(mut self, enabled: bool) -> Self {
self.flags.formatting.normalize_line_endings = enabled;
self
}
pub fn with_inject_header(mut self, enabled: bool) -> Self {
self.flags.inject_header = enabled;
self
}
pub fn with_custom_header(mut self, header: String) -> Self {
self.custom_header = Some(header);
self
}
pub fn with_trim_trailing_whitespace(mut self, enabled: bool) -> Self {
self.flags.formatting.trim_trailing_whitespace = enabled;
self
}
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
struct RustImport {
full_line: String,
path: String,
import_type: String,
group: ImportGroup,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
enum ImportGroup {
Std,
External,
Internal,
}
impl RustImport {
fn parse(line: &str) -> Option<Self> {
let trimmed = line.trim();
if !trimmed.starts_with("use ") {
return None;
}
let path_part = trimmed[4..].trim_end_matches(';').trim();
if path_part.starts_with("self::") || path_part.starts_with("super::") {
return None;
}
let group = if path_part.starts_with("std::")
|| path_part.starts_with("core::")
|| path_part.starts_with("alloc::")
{
ImportGroup::Std
} else if path_part.starts_with("crate::") {
ImportGroup::Internal
} else if path_part.contains("::") {
ImportGroup::External
} else {
ImportGroup::Internal
};
Some(RustImport {
full_line: trimmed.to_string(),
path: path_part.to_string(),
import_type: "use".to_string(),
group,
})
}
fn sort_key(&self) -> (ImportGroup, String) {
(self.group, self.path.clone())
}
}
pub fn canonicalize(content: &str, options: &CanonicalizeOptions) -> Result<String> {
let mut result = content.to_string();
if options.flags.formatting.normalize_line_endings {
result = normalize_line_endings(&result);
}
if options.flags.formatting.trim_trailing_whitespace {
result = trim_trailing_whitespace(&result);
}
if options.flags.formatting.sort_imports {
result = sort_rust_imports(&result)?;
}
if options.flags.inject_header {
let header = options
.custom_header
.clone()
.unwrap_or_else(default_generated_header);
result = inject_header(&result, &header);
}
if !result.ends_with('\n') {
result.push('\n');
}
Ok(result)
}
fn normalize_line_endings(content: &str) -> String {
content.replace("\r\n", "\n").replace('\r', "\n")
}
fn trim_trailing_whitespace(content: &str) -> String {
let trimmed = content
.lines()
.map(|line| line.trim_end())
.collect::<Vec<_>>()
.join("\n");
if content.ends_with('\n') {
format!("{}\n", trimmed)
} else {
trimmed
}
}
fn sort_rust_imports(content: &str) -> Result<String> {
let lines: Vec<&str> = content.lines().collect();
let mut result = String::new();
let mut i = 0;
while i < lines.len() {
let line = lines[i];
if line.trim().starts_with("use ") {
let mut imports = Vec::new();
while i < lines.len() {
let trimmed = lines[i].trim();
if trimmed.starts_with("use ") {
if let Some(import) = RustImport::parse(lines[i]) {
imports.push(import);
} else {
result.push_str(lines[i]);
result.push('\n');
}
i += 1;
} else if trimmed.is_empty() || trimmed.starts_with("//") {
i += 1;
break;
} else {
break;
}
}
imports.sort_by_key(|imp| imp.sort_key());
let mut std_imports: BTreeSet<String> = BTreeSet::new();
let mut external_imports: BTreeSet<String> = BTreeSet::new();
let mut internal_imports: BTreeSet<String> = BTreeSet::new();
for imp in &imports {
match imp.group {
ImportGroup::Std => {
std_imports.insert(imp.full_line.clone());
}
ImportGroup::External => {
external_imports.insert(imp.full_line.clone());
}
ImportGroup::Internal => {
internal_imports.insert(imp.full_line.clone());
}
}
}
for imp in &std_imports {
result.push_str(imp);
result.push('\n');
}
if !std_imports.is_empty() && !external_imports.is_empty() {
result.push('\n');
}
for imp in &external_imports {
result.push_str(imp);
result.push('\n');
}
if !external_imports.is_empty() && !internal_imports.is_empty() {
result.push('\n');
}
for imp in &internal_imports {
result.push_str(imp);
result.push('\n');
}
if i < lines.len() && !lines[i].trim().is_empty() {
result.push('\n');
}
continue;
}
result.push_str(line);
result.push('\n');
i += 1;
}
Ok(result)
}
fn inject_header(content: &str, header: &str) -> String {
let has_header = content
.lines()
.take(5)
.any(|line| line.contains("DO NOT EDIT") || line.contains("Generated by"));
if has_header {
return content.to_string();
}
let (shebang, rest) = if content.starts_with("#!") {
let lines: Vec<&str> = content.lines().collect();
if let Some(idx) = lines.iter().position(|l| l.starts_with("#!")) {
let shebang_line = lines[idx];
let remaining = lines[idx + 1..].join("\n");
(Some(shebang_line.to_string()), remaining)
} else {
(None, content.to_string())
}
} else {
(None, content.to_string())
};
let header = format!("{}\n", header);
match shebang {
Some(s) => format!("{}\n{}{}", s, header, rest),
None => format!("{}{}", header, content),
}
}
fn default_generated_header() -> String {
"// DO NOT EDIT\n\
// Generated by ggen (https://github.com/seanchatmangpt/ggen)\n\
//\n\
// Changes to this file will be overwritten when the code is regenerated.\n\
// Please modify the source ontology (.ttl files) and templates instead.\n\
//\n\
// Generated at: "
.to_string()
+ &chrono::Utc::now().to_rfc3339()
}
pub fn get_generated_header() -> String {
default_generated_header()
}
pub fn canonicalize_file(
file_path: &Path, content: &str, options: &CanonicalizeOptions,
) -> Result<String> {
let ext = file_path.extension().and_then(|e| e.to_str()).unwrap_or("");
match ext {
"rs" => canonicalize_rust(content, options),
"toml" => canonicalize_toml(content, options),
"json" => canonicalize_json(content, options),
"ttl" | "turtle" => canonicalize_ttl(content, options),
_ => canonicalize_generic(content, options),
}
}
fn canonicalize_rust(content: &str, options: &CanonicalizeOptions) -> Result<String> {
canonicalize(content, options)
}
fn canonicalize_toml(content: &str, options: &CanonicalizeOptions) -> Result<String> {
let mut result = content.to_string();
if options.flags.formatting.normalize_line_endings {
result = normalize_line_endings(&result);
}
if options.flags.formatting.trim_trailing_whitespace {
result = trim_trailing_whitespace(&result);
}
if !result.ends_with('\n') {
result.push('\n');
}
Ok(result)
}
fn canonicalize_json(content: &str, _options: &CanonicalizeOptions) -> Result<String> {
let value: serde_json::Value =
serde_json::from_str(content).map_err(|e| Error::new(&format!("Invalid JSON: {}", e)))?;
let mut result = serde_json::to_string_pretty(&value)
.map_err(|e| Error::new(&format!("JSON serialization error: {}", e)))?;
if !result.ends_with('\n') {
result.push('\n');
}
Ok(result)
}
fn canonicalize_ttl(content: &str, options: &CanonicalizeOptions) -> Result<String> {
let mut result = content.to_string();
if options.flags.formatting.normalize_line_endings {
result = normalize_line_endings(&result);
}
if options.flags.formatting.trim_trailing_whitespace {
result = trim_trailing_whitespace(&result);
}
let re = Regex::new(r"\n{3,}").map_err(|e| Error::new(&format!("Regex error: {}", e)))?;
result = re.replace_all(&result, "\n\n").to_string();
if !result.ends_with('\n') {
result.push('\n');
}
Ok(result)
}
fn canonicalize_generic(content: &str, options: &CanonicalizeOptions) -> Result<String> {
let mut result = content.to_string();
if options.flags.formatting.normalize_line_endings {
result = normalize_line_endings(&result);
}
if options.flags.formatting.trim_trailing_whitespace {
result = trim_trailing_whitespace(&result);
}
if !result.ends_with('\n') {
result.push('\n');
}
Ok(result)
}
pub fn canonicalize_rust_quick(content: &str) -> Result<String> {
let options = CanonicalizeOptions::new()
.with_sort_imports(true)
.with_normalize_line_endings(true)
.with_trim_trailing_whitespace(true)
.with_inject_header(false);
canonicalize_rust(content, &options)
}
pub fn canonicalize_a2a(content: &str) -> Result<String> {
let options = CanonicalizeOptions::new()
.with_sort_imports(true)
.with_normalize_line_endings(true)
.with_trim_trailing_whitespace(true)
.with_inject_header(true);
canonicalize_rust(content, &options)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_normalize_line_endings() {
let input = "hello\r\nworld\r";
let result = normalize_line_endings(input);
assert_eq!(result, "hello\nworld\n");
}
#[test]
fn test_trim_trailing_whitespace() {
let input = "hello \nworld \n test \n";
let result = trim_trailing_whitespace(input);
assert_eq!(result, "hello\nworld\n test\n");
}
#[test]
fn test_inject_header() {
let content = "fn main() {}\n";
let header = "// Generated header\n";
let result = inject_header(content, header);
assert!(result.starts_with("// Generated header\n"));
assert!(result.contains("fn main() {}\n"));
}
#[test]
fn test_inject_header_preserves_existing() {
let content = "// DO NOT EDIT this file\nfn main() {}\n";
let header = "// Generated header\n";
let result = inject_header(content, header);
assert_eq!(result, content);
}
#[test]
fn test_rust_import_parse() {
let line = "use std::collections::HashMap;";
let import = RustImport::parse(line);
assert!(import.is_some());
let imp = import.unwrap();
assert_eq!(imp.path, "std::collections::HashMap");
assert_eq!(imp.group, ImportGroup::Std);
}
#[test]
fn test_rust_import_sorting() {
let input = r#"
use crate::module::inner::Struct;
use std::collections::HashMap;
use external_crate::Trait;
use std::vec::Vec;
use crate::module::Type;
fn main() {}
"#;
let result = sort_rust_imports(input).unwrap();
let lines: Vec<&str> = result.lines().collect();
let std_idx = lines.iter().position(|l| l.contains("std::collections"));
let vec_idx = lines.iter().position(|l| l.contains("std::vec"));
let external_idx = lines.iter().position(|l| l.contains("external_crate"));
let internal_idx = lines.iter().position(|l| l.contains("crate::module"));
assert!(std_idx.unwrap() < external_idx.unwrap());
assert!(external_idx.unwrap() < internal_idx.unwrap());
assert!(std_idx.unwrap() < vec_idx.unwrap());
}
#[test]
fn test_canonicalize_rust() {
let input = r#"use std::b::B;
use std::a::A;
fn main() { println!("hello"); }"#;
let options = CanonicalizeOptions::new()
.with_sort_imports(true)
.with_normalize_line_endings(true);
let result = canonicalize(input, &options).unwrap();
let a_pos = result.find("use std::a").unwrap();
let b_pos = result.find("use std::b").unwrap();
assert!(a_pos < b_pos);
}
#[test]
fn test_canonicalize_a2a() {
let input = r#"use std::b::B;
use std::a::A;
fn main() {}
"#;
let result = canonicalize_a2a(input).unwrap();
assert!(result.contains("DO NOT EDIT"));
assert!(result.contains("Generated by ggen"));
let a_pos = result.find("use std::a").unwrap();
let b_pos = result.find("use std::b").unwrap();
assert!(a_pos < b_pos);
}
#[test]
fn test_canonicalize_json() {
let input = r#"{"a":1,"b":2}"#;
let options = CanonicalizeOptions::new();
let result = canonicalize_json(input, &options).unwrap();
assert!(result.contains('\n')); assert!(result.ends_with('\n'));
}
#[test]
fn test_canonicalize_toml() {
let input = "[package]\r\nname = \"test\"\r\n";
let options = CanonicalizeOptions::new().with_normalize_line_endings(true);
let result = canonicalize_toml(input, &options).unwrap();
assert!(!result.contains("\r\n"));
assert_eq!(result, "[package]\nname = \"test\"\n");
}
#[test]
fn test_import_group_detection() {
assert_eq!(
RustImport::parse("use std::collections::HashMap;")
.unwrap()
.group,
ImportGroup::Std
);
assert_eq!(
RustImport::parse("use serde::Serialize;").unwrap().group,
ImportGroup::External
);
assert_eq!(
RustImport::parse("use crate::module::Struct;")
.unwrap()
.group,
ImportGroup::Internal
);
}
#[test]
fn test_self_import_skipped() {
let result = RustImport::parse("use self::Struct;");
assert!(result.is_none());
}
#[test]
fn test_canonicalize_quick() {
let input = r#"use std::b::B;
use std::a::A;
"#;
let result = canonicalize_rust_quick(input).unwrap();
let a_pos = result.find("use std::a").unwrap();
let b_pos = result.find("use std::b").unwrap();
assert!(a_pos < b_pos);
assert!(result.ends_with('\n'));
}
}