1use crate::formatter::{dialect::*, logging::*, source_map::*, transforms::*, types::*};
4use std::borrow::Cow;
5
6#[derive(Debug, Clone)]
8pub struct NormalizationEngine {
9 ws_stack: Vec<WhitespaceContext>,
11
12 config: EngineConfig,
14}
15
16#[derive(Debug, Clone)]
17pub struct EngineConfig {
18 pub enable_fast_path: bool,
20
21 pub max_nesting_depth: usize,
23
24 pub preserve_comments: bool,
26
27 pub generate_proofs: bool,
29}
30
31impl Default for EngineConfig {
32 fn default() -> Self {
33 Self {
34 enable_fast_path: true,
35 max_nesting_depth: 256,
36 preserve_comments: true,
37 generate_proofs: false,
38 }
39 }
40}
41
42impl NormalizationEngine {
43 pub fn new() -> Self {
44 Self {
45 ws_stack: vec![WhitespaceContext::Command],
46 config: EngineConfig::default(),
47 }
48 }
49
50 pub fn with_config(config: EngineConfig) -> Self {
51 Self {
52 ws_stack: vec![WhitespaceContext::Command],
53 config,
54 }
55 }
56
57 pub fn is_canonical(&self, input: &[u8]) -> bool {
59 if !self.config.enable_fast_path {
60 return false;
61 }
62
63 let input_str = match std::str::from_utf8(input) {
65 Ok(s) => s,
66 Err(_) => return false,
67 };
68
69 if self.config.preserve_comments && input_str.contains('#') {
71 return false;
72 }
73
74 if input_str.contains(" ") || input_str.contains("\t") || input_str.contains("\r") || input_str.starts_with(' ') || input_str.ends_with(' ')
80 {
81 return false;
83 }
84
85 if input_str.contains("$") && !self.has_proper_quoting(input_str) {
87 return false;
88 }
89
90 true
91 }
92
93 pub fn normalize<'a>(
95 &mut self,
96 input: &'a [u8],
97 dialect: ShellDialect,
98 config: FormatConfig,
99 ) -> crate::Result<FormattedSource<'a>> {
100 let input_str = std::str::from_utf8(input)
101 .map_err(|e| crate::Error::Internal(format!("Invalid UTF-8: {e}")))?;
102
103 if self.is_canonical(input) {
105 return Ok(FormattedSource {
106 text: Cow::Borrowed(input_str),
107 source_map: SourceMap::identity(input.len()),
108 metadata: SemanticMetadata::default(),
109 canonical_hash: blake3::hash(input).into(),
110 transforms: TransformLog::new(),
111 });
112 }
113
114 let mut output = String::with_capacity(input.len() + input.len() / 4);
116 let mut source_map = SourceMapBuilder::new();
117 let mut transform_log = TransformLog::new();
118 let mut metadata = SemanticMetadata::default();
119
120 let mut line_number = 1;
122 let mut char_pos = 0;
123
124 for line in input_str.lines() {
125 let _line_start = char_pos;
126 let formatted_line = self.normalize_line(
127 line,
128 dialect.clone(),
129 &config,
130 &mut source_map,
131 &mut transform_log,
132 &mut metadata,
133 line_number,
134 char_pos,
135 )?;
136
137 output.push_str(&formatted_line);
138 if line_number < input_str.lines().count() {
139 output.push('\n');
140 }
141
142 char_pos += line.len() + 1; line_number += 1;
144 }
145
146 let canonical_hash = blake3::hash(output.as_bytes()).into();
147
148 Ok(FormattedSource {
149 text: Cow::Owned(output),
150 source_map: source_map.build(),
151 metadata,
152 canonical_hash,
153 transforms: transform_log,
154 })
155 }
156
157 #[allow(clippy::too_many_arguments)]
158 fn normalize_line(
159 &mut self,
160 line: &str,
161 dialect: ShellDialect,
162 config: &FormatConfig,
163 source_map: &mut SourceMapBuilder,
164 transform_log: &mut TransformLog,
165 metadata: &mut SemanticMetadata,
166 line_number: usize,
167 line_start: usize,
168 ) -> crate::Result<String> {
169 let mut output = String::with_capacity(line.len());
170 let mut chars = line.char_indices().peekable();
171
172 while let Some((pos, ch)) = chars.next() {
173 let absolute_pos = line_start + pos;
174
175 match ch {
176 ' ' | '\t' => {
178 self.normalize_whitespace(
179 &mut chars,
180 &mut output,
181 source_map,
182 transform_log,
183 absolute_pos,
184 )?;
185 }
186
187 '#' => {
189 if config.preserve_whitespace || self.config.preserve_comments {
190 let comment = self.extract_comment(&mut chars, pos, line)?;
191 output.push_str(&comment);
192
193 metadata.comments.push(CommentMetadata {
194 content: comment.clone(),
195 start_pos: absolute_pos,
196 end_pos: absolute_pos + comment.len(),
197 line: line_number,
198 column: pos,
199 });
200 } else {
201 output.push(ch);
203 }
204 }
205
206 '$' => {
208 self.normalize_expansion(
209 &mut chars,
210 &mut output,
211 source_map,
212 transform_log,
213 absolute_pos,
214 dialect.clone(),
215 )?;
216 }
217
218 '\'' | '"' => {
220 self.normalize_quoted_string(
221 ch,
222 &mut chars,
223 &mut output,
224 source_map,
225 absolute_pos,
226 )?;
227 }
228
229 _ => {
231 output.push(ch);
232 source_map.add_char_mapping(
233 CharPos(absolute_pos),
234 CharPos(line_start + output.len() - 1),
235 );
236 }
237 }
238 }
239
240 Ok(output)
241 }
242
243 fn normalize_whitespace(
244 &mut self,
245 chars: &mut std::iter::Peekable<std::str::CharIndices>,
246 output: &mut String,
247 source_map: &mut SourceMapBuilder,
248 transform_log: &mut TransformLog,
249 start_pos: usize,
250 ) -> crate::Result<()> {
251 let context = self
252 .ws_stack
253 .last()
254 .copied()
255 .unwrap_or(WhitespaceContext::Command);
256
257 let mut whitespace_chars = 1; while let Some((_, ch)) = chars.peek() {
260 if ch.is_whitespace() && *ch != '\n' {
261 chars.next();
262 whitespace_chars += 1;
263 } else {
264 break;
265 }
266 }
267
268 let normalized = match context {
270 WhitespaceContext::Command => " ", WhitespaceContext::Arithmetic => "", WhitespaceContext::QuotedString { .. } => {
273 return Ok(()); }
276 _ => " ", };
278
279 if whitespace_chars > 1 || (!normalized.is_empty() && whitespace_chars == 0) {
280 let transform = Transform::WhitespaceNormalize {
282 context,
283 preserved: IntervalSet::new(),
284 };
285
286 transform_log.add_entry(TransformEntry {
287 id: TransformId::new(),
288 transform,
289 source_span: Span::new(BytePos(start_pos), BytePos(start_pos + whitespace_chars)),
290 result_span: Span::new(
291 BytePos(output.len()),
292 BytePos(output.len() + normalized.len()),
293 ),
294 timestamp: std::time::Instant::now(),
295 proof: None,
296 semantic_delta: None,
297 });
298 }
299
300 output.push_str(normalized);
301
302 source_map.add_range_mapping(
304 CharPos(start_pos),
305 CharPos(start_pos + whitespace_chars),
306 CharPos(output.len() - normalized.len()),
307 CharPos(output.len()),
308 );
309
310 Ok(())
311 }
312
313 fn normalize_expansion(
314 &mut self,
315 chars: &mut std::iter::Peekable<std::str::CharIndices>,
316 output: &mut String,
317 _source_map: &mut SourceMapBuilder,
318 transform_log: &mut TransformLog,
319 start_pos: usize,
320 _dialect: ShellDialect,
321 ) -> crate::Result<()> {
322 let context = self
324 .ws_stack
325 .last()
326 .copied()
327 .unwrap_or(WhitespaceContext::Command);
328
329 let needs_quotes = matches!(context, WhitespaceContext::Command);
330
331 if let Some((_, '{')) = chars.peek() {
332 output.push('$');
334 output.push('{');
335 chars.next();
336
337 for (_, ch) in chars.by_ref() {
338 output.push(ch);
339 if ch == '}' {
340 break;
341 }
342 }
343 } else {
344 let var_start = output.len();
346 let mut var_name = String::new();
347
348 while let Some((_, ch)) = chars.peek() {
349 if ch.is_alphanumeric() || *ch == '_' {
350 var_name.push(*ch);
351 chars.next();
352 } else {
353 break;
354 }
355 }
356
357 if needs_quotes && !var_name.is_empty() {
358 output.push('"');
359 output.push('$');
360 output.push_str(&var_name);
361 output.push('"');
362
363 let transform = Transform::QuoteExpansion {
365 kind: QuoteKind::Double,
366 reason: QuoteReason::WordSplitting,
367 proof: SexprProof::new(format!(
368 "(= (word-split ${var_name}) (word-split \"${var_name}\"))"
369 )),
370 };
371
372 transform_log.add_entry(TransformEntry {
373 id: TransformId::new(),
374 transform,
375 source_span: Span::new(
376 BytePos(start_pos),
377 BytePos(start_pos + 1 + var_name.len()),
378 ),
379 result_span: Span::new(BytePos(var_start), BytePos(output.len())),
380 timestamp: std::time::Instant::now(),
381 proof: None,
382 semantic_delta: None,
383 });
384 } else {
385 output.push('$');
386 output.push_str(&var_name);
387 }
388 }
389
390 Ok(())
391 }
392
393 fn normalize_quoted_string(
394 &mut self,
395 quote_char: char,
396 chars: &mut std::iter::Peekable<std::str::CharIndices>,
397 output: &mut String,
398 _source_map: &mut SourceMapBuilder,
399 _start_pos: usize,
400 ) -> crate::Result<()> {
401 output.push(quote_char);
402
403 let quote_type = match quote_char {
405 '\'' => QuoteType::Single,
406 '"' => QuoteType::Double,
407 _ => QuoteType::Double,
408 };
409
410 self.ws_stack
411 .push(WhitespaceContext::QuotedString { quote_type });
412
413 while let Some((_, ch)) = chars.next() {
415 output.push(ch);
416
417 if ch == quote_char {
418 break;
419 }
420
421 if ch == '\\' {
423 if let Some((_, escaped)) = chars.next() {
424 output.push(escaped);
425 }
426 }
427 }
428
429 self.ws_stack.pop();
431
432 Ok(())
433 }
434
435 fn extract_comment(
436 &self,
437 chars: &mut std::iter::Peekable<std::str::CharIndices>,
438 start_pos: usize,
439 line: &str,
440 ) -> crate::Result<String> {
441 let comment = line[start_pos..].to_string();
443
444 while chars.next().is_some() {}
446
447 Ok(comment)
448 }
449
450 fn has_proper_quoting(&self, input: &str) -> bool {
451 let mut in_quotes = false;
454 let mut quote_char = '\0';
455 let chars = input.chars();
456
457 for ch in chars {
458 match ch {
459 '\'' | '"' if !in_quotes => {
460 in_quotes = true;
461 quote_char = ch;
462 }
463 c if in_quotes && c == quote_char => {
464 in_quotes = false;
465 quote_char = '\0';
466 }
467 '$' if !in_quotes => {
468 return false;
470 }
471 _ => {}
472 }
473 }
474
475 true
476 }
477}
478
479impl Default for NormalizationEngine {
480 fn default() -> Self {
481 Self::new()
482 }
483}
484
485#[cfg(test)]
486mod tests {
487 use super::*;
488
489 #[test]
490 fn test_engine_creation() {
491 let engine = NormalizationEngine::new();
492 assert_eq!(engine.ws_stack.len(), 1);
493 assert!(matches!(engine.ws_stack[0], WhitespaceContext::Command));
494 }
495
496 #[test]
497 fn test_engine_with_config() {
498 let config = EngineConfig {
499 enable_fast_path: false,
500 max_nesting_depth: 512,
501 preserve_comments: false,
502 generate_proofs: true,
503 };
504
505 let engine = NormalizationEngine::with_config(config.clone());
506 assert!(!engine.config.enable_fast_path);
507 assert_eq!(engine.config.max_nesting_depth, 512);
508 }
509
510 #[test]
511 fn test_is_canonical_simple() {
512 let engine = NormalizationEngine::new();
513
514 assert!(engine.is_canonical(b"echo hello"));
515 assert!(!engine.is_canonical(b"echo hello")); assert!(!engine.is_canonical(b" echo hello")); assert!(!engine.is_canonical(b"echo hello ")); assert!(!engine.is_canonical(b"echo\thello")); }
520
521 #[test]
522 fn test_is_canonical_quoting() {
523 let engine = NormalizationEngine::new();
524
525 assert!(engine.is_canonical(b"echo \"$var\""));
526 assert!(!engine.is_canonical(b"echo $var")); }
528
529 #[test]
530 fn test_normalize_identity() {
531 let mut engine = NormalizationEngine::new();
532 let input = b"echo hello";
533 let config = FormatConfig::default();
534
535 let result = engine.normalize(input, ShellDialect::Posix, config);
536 assert!(result.is_ok());
537
538 let formatted = result.unwrap();
539 assert_eq!(formatted.text.as_ref(), "echo hello");
540 }
541
542 #[test]
543 fn test_normalize_whitespace() {
544 let mut engine = NormalizationEngine::new();
545 let input = b"echo hello world";
546 let config = FormatConfig::default();
547
548 let result = engine.normalize(input, ShellDialect::Posix, config);
549 assert!(result.is_ok());
550
551 let formatted = result.unwrap();
552 assert_eq!(formatted.text.as_ref(), "echo hello world");
553 assert!(!formatted.transforms.entries.is_empty());
554 }
555
556 #[test]
557 fn test_normalize_variable_quoting() {
558 let mut engine = NormalizationEngine::new();
559 let input = b"echo $var";
560 let config = FormatConfig::default();
561
562 let result = engine.normalize(input, ShellDialect::Posix, config);
563 assert!(result.is_ok());
564
565 let formatted = result.unwrap();
566 assert_eq!(formatted.text.as_ref(), "echo \"$var\"");
567
568 let has_quote_transform = formatted
570 .transforms
571 .entries
572 .iter()
573 .any(|entry| matches!(entry.transform, Transform::QuoteExpansion { .. }));
574 assert!(has_quote_transform);
575 }
576
577 #[test]
578 fn test_normalize_quoted_strings() {
579 let mut engine = NormalizationEngine::new();
580 let input = b"echo 'hello world'";
581 let config = FormatConfig::default();
582
583 let result = engine.normalize(input, ShellDialect::Posix, config);
584 assert!(result.is_ok());
585
586 let formatted = result.unwrap();
587 assert_eq!(formatted.text.as_ref(), "echo 'hello world'");
589 }
590
591 #[test]
592 fn test_normalize_comments() {
593 let mut engine = NormalizationEngine::new();
594 let input = b"echo hello # this is a comment";
595 let config = FormatConfig::default();
596
597 let result = engine.normalize(input, ShellDialect::Posix, config);
598 assert!(result.is_ok());
599
600 let formatted = result.unwrap();
601 assert_eq!(formatted.text.as_ref(), "echo hello # this is a comment");
602 assert_eq!(formatted.metadata.comments.len(), 1);
603 assert_eq!(
604 formatted.metadata.comments[0].content,
605 "# this is a comment"
606 );
607 }
608
609 #[test]
610 fn test_normalize_multiline() {
611 let mut engine = NormalizationEngine::new();
612 let input = b"echo hello\necho world";
613 let config = FormatConfig::default();
614
615 let result = engine.normalize(input, ShellDialect::Posix, config);
616 assert!(result.is_ok());
617
618 let formatted = result.unwrap();
619 assert_eq!(formatted.text.as_ref(), "echo hello\necho world");
620 }
621
622 #[test]
623 fn test_has_proper_quoting() {
624 let engine = NormalizationEngine::new();
625
626 assert!(engine.has_proper_quoting("echo \"$var\""));
627 assert!(engine.has_proper_quoting("echo '$var'"));
628 assert!(!engine.has_proper_quoting("echo $var"));
629 assert!(engine.has_proper_quoting("echo hello")); }
631
632 #[test]
633 fn test_config_effects() {
634 let config = EngineConfig {
635 enable_fast_path: false,
636 preserve_comments: false,
637 ..Default::default()
638 };
639
640 let mut engine = NormalizationEngine::with_config(config);
641
642 assert!(!engine.is_canonical(b"echo hello"));
644
645 let input = b"echo hello # comment";
647 let format_config = FormatConfig::default();
648
649 let result = engine.normalize(input, ShellDialect::Posix, format_config);
650 assert!(result.is_ok());
651 }
652}