1use anyhow::{Context, Result};
2use blake3::Hasher;
3use serde::Serialize;
4
5use crate::error::XCheckerError;
6use crate::types::FileType;
7
8pub fn emit_jcs<T: Serialize>(value: &T) -> Result<String> {
31 let json_value =
32 serde_json::to_value(value).with_context(|| "Failed to serialize value to JSON")?;
33 let json_bytes = serde_json_canonicalizer::to_vec(&json_value)
34 .with_context(|| "Failed to canonicalize JSON using JCS")?;
35 String::from_utf8(json_bytes).with_context(|| "JCS output contained invalid UTF-8")
36}
37
38#[allow(dead_code)] pub const CANON_VERSION_YAML: &str = "yaml-v1";
41#[allow(dead_code)] pub const CANON_VERSION_MD: &str = "md-v1";
43pub const CANON_VERSION: &str = "yaml-v1,md-v1";
44pub const CANONICALIZATION_BACKEND: &str = "jcs-rfc8785"; pub struct Canonicalizer {
49 version: String,
50}
51
52impl Canonicalizer {
53 #[must_use]
55 pub fn new() -> Self {
56 Self {
57 version: CANON_VERSION.to_string(),
58 }
59 }
60
61 #[must_use]
63 pub fn version(&self) -> &str {
64 &self.version
65 }
66
67 #[must_use]
69 pub const fn backend(&self) -> &'static str {
70 CANONICALIZATION_BACKEND
71 }
72
73 #[allow(dead_code)]
81 pub fn canonicalize_yaml(&self, content: &str) -> Result<String> {
82 let yaml_value: serde_yaml::Value =
84 serde_yaml::from_str(content).with_context(|| "Failed to parse YAML content")?;
85
86 let mut output = serde_yaml::to_string(&yaml_value)
88 .with_context(|| "Failed to serialize YAML content")?;
89
90 output = self.normalize_line_endings(&output);
92 if !output.ends_with('\n') {
93 output.push('\n');
94 }
95
96 let lines: Vec<&str> = output.lines().collect();
98 let cleaned_lines: Vec<String> = lines
99 .iter()
100 .map(|line| line.trim_end().to_string())
101 .collect();
102
103 Ok(cleaned_lines.join("\n") + "\n")
104 }
105
106 pub fn normalize_markdown(&self, content: &str) -> Result<String> {
114 let mut normalized = self.normalize_line_endings(content);
115
116 let lines: Vec<&str> = normalized.lines().collect();
118 let mut cleaned_lines: Vec<String> = lines
119 .iter()
120 .map(|line| line.trim_end().to_string())
121 .collect();
122
123 for line in &mut cleaned_lines {
125 if line.starts_with("~~~") {
126 let lang_tag = line.trim_start_matches('~').trim();
128 if lang_tag.is_empty() {
129 *line = "```".to_string();
130 } else {
131 *line = format!("```{lang_tag}");
132 }
133 }
134 }
135
136 normalized = cleaned_lines.join("\n");
137
138 while normalized.ends_with("\n\n\n") {
140 normalized = normalized.trim_end_matches('\n').to_string() + "\n\n";
141 }
142
143 normalized = normalized.trim_end_matches('\n').to_string() + "\n";
145
146 Ok(normalized)
147 }
148
149 #[must_use]
151 pub fn normalize_text(&self, content: &str) -> String {
152 self.normalize_line_endings(content)
153 }
154
155 pub fn hash_canonicalized(&self, content: &str, file_type: FileType) -> Result<String> {
160 let hash_input = match file_type {
161 FileType::Yaml => {
162 let yaml_value: serde_yaml::Value = serde_yaml::from_str(content)
164 .with_context(|| "Failed to parse YAML content for hashing")?;
165
166 let json_value: serde_json::Value =
168 serde_yaml::from_str(&serde_yaml::to_string(&yaml_value)?)
169 .with_context(|| "Failed to convert YAML to JSON for hashing")?;
170
171 serde_json_canonicalizer::to_vec(&json_value)
173 .map(|bytes| String::from_utf8(bytes).unwrap())
174 .with_context(|| "Failed to canonicalize JSON using JCS")?
175 }
176 FileType::Markdown => self.normalize_markdown(content)?,
177 FileType::Text => self.normalize_text(content),
178 };
179
180 let mut hasher = Hasher::new();
181 hasher.update(hash_input.as_bytes());
182 Ok(hasher.finalize().to_hex().to_string())
183 }
184
185 pub fn hash_canonicalized_with_context(
187 &self,
188 content: &str,
189 file_type: FileType,
190 phase: &str,
191 ) -> Result<String, XCheckerError> {
192 self.hash_canonicalized(content, file_type).map_err(|e| {
193 XCheckerError::CanonicalizationFailed {
194 phase: phase.to_string(),
195 reason: e.to_string(),
196 }
197 })
198 }
199
200 fn normalize_line_endings(&self, content: &str) -> String {
202 content.replace("\r\n", "\n").replace('\r', "\n")
203 }
204}
205
206impl Default for Canonicalizer {
207 fn default() -> Self {
208 Self::new()
209 }
210}
211
212#[cfg(test)]
213mod tests {
214 use super::*;
215
216 #[test]
217 fn test_yaml_canonicalization() {
218 let canonicalizer = Canonicalizer::new();
219
220 let yaml_content = r"
222name: test
223version: 1.0
224dependencies:
225 - dep1
226 - dep2
227config:
228 debug: true
229 port: 8080
230";
231
232 let result = canonicalizer.canonicalize_yaml(yaml_content);
233 assert!(result.is_ok());
234
235 let canonicalized = result.unwrap();
236 assert!(canonicalized.ends_with('\n'));
237 assert!(!canonicalized.contains('\r'));
238
239 let reordered_yaml = r"
241version: 1.0
242name: test
243config:
244 port: 8080
245 debug: true
246dependencies:
247 - dep1
248 - dep2
249";
250
251 let reordered_result = canonicalizer.canonicalize_yaml(reordered_yaml);
252 assert!(reordered_result.is_ok());
253
254 let reordered_canonicalized = reordered_result.unwrap();
256 assert!(reordered_canonicalized.ends_with('\n'));
257 assert!(!reordered_canonicalized.contains('\r'));
258 }
259
260 #[test]
261 fn test_markdown_normalization() {
262 let canonicalizer = Canonicalizer::new();
263
264 let markdown_content =
265 "# Title\r\n\r\nSome content with trailing spaces \r\n\r\n\r\n\r\n";
266 let result = canonicalizer.normalize_markdown(markdown_content);
267 assert!(result.is_ok());
268
269 let normalized = result.unwrap();
270 assert_eq!(normalized, "# Title\n\nSome content with trailing spaces\n");
271 assert!(!normalized.contains('\r'));
272 assert!(!normalized.contains(" \n")); assert!(!normalized.ends_with("\n\n\n")); }
275
276 #[test]
277 fn test_text_normalization() {
278 let canonicalizer = Canonicalizer::new();
279
280 let text_content = "line1\r\nline2\rline3\n";
281 let normalized = canonicalizer.normalize_text(text_content);
282
283 assert_eq!(normalized, "line1\nline2\nline3\n");
284 assert!(!normalized.contains('\r'));
285 }
286
287 #[test]
288 fn test_hash_consistency() {
289 let canonicalizer = Canonicalizer::new();
290
291 let content = "test content\nwith newlines";
292 let hash1 = canonicalizer
293 .hash_canonicalized(content, FileType::Text)
294 .unwrap();
295 let hash2 = canonicalizer
296 .hash_canonicalized(content, FileType::Text)
297 .unwrap();
298
299 assert_eq!(hash1, hash2);
301
302 let content_crlf = "test content\r\nwith newlines";
304 let hash3 = canonicalizer
305 .hash_canonicalized(content_crlf, FileType::Text)
306 .unwrap();
307 assert_eq!(hash1, hash3);
308 }
309
310 #[test]
311 fn test_yaml_hash_determinism() {
312 let canonicalizer = Canonicalizer::new();
313
314 let yaml1 = r"
315name: test
316version: 1.0
317";
318
319 let yaml2 = r"
320version: 1.0
321name: test
322";
323
324 let hash1 = canonicalizer
325 .hash_canonicalized(yaml1, FileType::Yaml)
326 .unwrap();
327 let hash2 = canonicalizer
328 .hash_canonicalized(yaml2, FileType::Yaml)
329 .unwrap();
330
331 assert_eq!(hash1, hash2);
333 assert!(!hash1.is_empty());
334 assert!(!hash2.is_empty());
335 }
336
337 #[test]
338 fn test_markdown_hash_determinism() {
339 let canonicalizer = Canonicalizer::new();
340
341 let md1 = "# Title\n\nContent with trailing spaces \n\n\n";
342 let md2 = "# Title\r\n\r\nContent with trailing spaces\r\n";
343
344 let hash1 = canonicalizer
345 .hash_canonicalized(md1, FileType::Markdown)
346 .unwrap();
347 let hash2 = canonicalizer
348 .hash_canonicalized(md2, FileType::Markdown)
349 .unwrap();
350
351 assert_eq!(hash1, hash2);
353 }
354
355 #[test]
356 fn test_invalid_yaml() {
357 let canonicalizer = Canonicalizer::new();
358
359 let invalid_yaml = "invalid: yaml: content: [unclosed";
360 let result = canonicalizer.canonicalize_yaml(invalid_yaml);
361
362 assert!(result.is_err());
363 }
364
365 #[test]
366 fn test_version_string() {
367 let canonicalizer = Canonicalizer::new();
368 assert_eq!(canonicalizer.version(), "yaml-v1,md-v1");
369 }
370
371 #[test]
372 fn test_backend_string() {
373 let canonicalizer = Canonicalizer::new();
374 assert_eq!(canonicalizer.backend(), "jcs-rfc8785");
375 }
376
377 #[test]
378 fn test_markdown_fence_normalization() {
379 let canonicalizer = Canonicalizer::new();
380
381 let markdown_with_tildes = r#"# Title
382
383Some content
384
385~~~rust
386fn main() {
387 println!("Hello");
388}
389~~~
390
391More content
392"#;
393
394 let result = canonicalizer.normalize_markdown(markdown_with_tildes);
395 assert!(result.is_ok());
396
397 let normalized = result.unwrap();
398 assert!(normalized.contains("```rust"));
399 assert!(!normalized.contains("~~~"));
400 assert!(normalized.ends_with('\n'));
401 assert!(!normalized.ends_with("\n\n"));
402 }
403
404 #[test]
405 fn test_yaml_jcs_canonicalization() {
406 let canonicalizer = Canonicalizer::new();
407
408 let yaml1 = r#"
410config:
411 database:
412 host: localhost
413 port: 5432
414 cache:
415 enabled: true
416 ttl: 300
417name: test
418version: "1.0"
419"#;
420
421 let yaml2 = r#"
422version: "1.0"
423name: test
424config:
425 cache:
426 ttl: 300
427 enabled: true
428 database:
429 port: 5432
430 host: localhost
431"#;
432
433 let hash1 = canonicalizer
434 .hash_canonicalized(yaml1, FileType::Yaml)
435 .unwrap();
436 let hash2 = canonicalizer
437 .hash_canonicalized(yaml2, FileType::Yaml)
438 .unwrap();
439
440 assert_eq!(hash1, hash2);
442 }
443
444 #[test]
445 fn test_canonicalization_constants() {
446 assert_eq!(CANON_VERSION_YAML, "yaml-v1");
447 assert_eq!(CANON_VERSION_MD, "md-v1");
448 assert_eq!(CANON_VERSION, "yaml-v1,md-v1");
449 assert_eq!(CANONICALIZATION_BACKEND, "jcs-rfc8785");
450 }
451
452 #[test]
455 fn test_yaml_reordered_fixtures() {
456 let canonicalizer = Canonicalizer::new();
457
458 let yaml_fixture_1a = r#"
460metadata:
461 name: "test-project"
462 version: "1.0.0"
463 authors:
464 - "Alice"
465 - "Bob"
466dependencies:
467 runtime:
468 serde: "1.0"
469 tokio: "1.0"
470 dev:
471 criterion: "0.4"
472config:
473 database:
474 host: "localhost"
475 port: 5432
476 ssl: true
477 logging:
478 level: "info"
479 format: "json"
480"#;
481
482 let yaml_fixture_1b = r#"
483config:
484 logging:
485 format: "json"
486 level: "info"
487 database:
488 ssl: true
489 port: 5432
490 host: "localhost"
491dependencies:
492 dev:
493 criterion: "0.4"
494 runtime:
495 tokio: "1.0"
496 serde: "1.0"
497metadata:
498 authors:
499 - "Alice"
500 - "Bob"
501 version: "1.0.0"
502 name: "test-project"
503"#;
504
505 let hash_1a = canonicalizer
506 .hash_canonicalized(yaml_fixture_1a, FileType::Yaml)
507 .unwrap();
508 let hash_1b = canonicalizer
509 .hash_canonicalized(yaml_fixture_1b, FileType::Yaml)
510 .unwrap();
511
512 assert_eq!(
514 hash_1a, hash_1b,
515 "Reordered YAML should produce identical hashes"
516 );
517
518 let yaml_fixture_2a = "name: test\nversion: 1.0\ndebug: true";
520 let yaml_fixture_2b = "name: test \r\nversion: 1.0 \r\ndebug: true \r\n";
521
522 let hash_2a = canonicalizer
523 .hash_canonicalized(yaml_fixture_2a, FileType::Yaml)
524 .unwrap();
525 let hash_2b = canonicalizer
526 .hash_canonicalized(yaml_fixture_2b, FileType::Yaml)
527 .unwrap();
528
529 assert_eq!(
530 hash_2a, hash_2b,
531 "Different whitespace should produce identical hashes"
532 );
533 }
534
535 #[test]
536 fn test_markdown_reordered_fixtures() {
537 let canonicalizer = Canonicalizer::new();
538
539 let md_fixture_1a = r#"# Project Title
541
542## Overview
543
544This is a test project.
545
546```rust
547fn main() {
548 println!("Hello");
549}
550```
551
552## Features
553
554- Feature 1
555- Feature 2
556
557"#;
558
559 let md_fixture_1b = r#"# Project Title
560
561## Overview
562
563This is a test project.
564
565~~~rust
566fn main() {
567 println!("Hello");
568}
569~~~
570
571## Features
572
573- Feature 1
574- Feature 2
575
576
577
578"#;
579
580 let hash_1a = canonicalizer
581 .hash_canonicalized(md_fixture_1a, FileType::Markdown)
582 .unwrap();
583 let hash_1b = canonicalizer
584 .hash_canonicalized(md_fixture_1b, FileType::Markdown)
585 .unwrap();
586
587 assert_eq!(
589 hash_1a, hash_1b,
590 "Different markdown formatting should produce identical hashes"
591 );
592
593 let md_fixture_2a = "# Title\n\nContent\n";
595 let md_fixture_2b = "# Title\r\n\r\nContent\r\n\r\n\r\n";
596
597 let hash_2a = canonicalizer
598 .hash_canonicalized(md_fixture_2a, FileType::Markdown)
599 .unwrap();
600 let hash_2b = canonicalizer
601 .hash_canonicalized(md_fixture_2b, FileType::Markdown)
602 .unwrap();
603
604 assert_eq!(
605 hash_2a, hash_2b,
606 "Different line endings should produce identical hashes"
607 );
608 }
609
610 #[test]
611 fn test_structure_determinism_independent_of_formatting() {
612 let canonicalizer = Canonicalizer::new();
613
614 let yaml_minimal = "a: 1\nb: 2";
616 let yaml_verbose = r"
617# Comment
618a: 1 # inline comment
619# Another comment
620b: 2 # another inline comment
621";
622
623 let parsed_minimal: serde_yaml::Value = serde_yaml::from_str(yaml_minimal).unwrap();
625 let parsed_verbose: serde_yaml::Value = serde_yaml::from_str(yaml_verbose).unwrap();
626
627 assert_eq!(
629 parsed_minimal, parsed_verbose,
630 "Parsed structures should be identical"
631 );
632
633 let hash_minimal = canonicalizer
635 .hash_canonicalized(yaml_minimal, FileType::Yaml)
636 .unwrap();
637 let hash_verbose = canonicalizer
638 .hash_canonicalized(yaml_verbose, FileType::Yaml)
639 .unwrap();
640
641 assert_eq!(
642 hash_minimal, hash_verbose,
643 "Structure determinism should be independent of formatting"
644 );
645 }
646
647 #[test]
648 fn test_malformed_input_error_handling() {
649 let canonicalizer = Canonicalizer::new();
650
651 let malformed_yaml_cases = [
653 "invalid: yaml: content: [unclosed",
654 "key: 'unclosed string",
655 "- item\n- [unclosed array",
656 "key: {unclosed: object",
657 "---\n...\n---\ninvalid multiple docs",
658 ];
659
660 for (i, malformed_yaml) in malformed_yaml_cases.iter().enumerate() {
661 let result = canonicalizer.hash_canonicalized(malformed_yaml, FileType::Yaml);
662 assert!(
663 result.is_err(),
664 "Malformed YAML case {i} should return error: {malformed_yaml}"
665 );
666
667 let error_msg = result.unwrap_err().to_string();
669 assert!(
670 error_msg.contains("Failed to parse YAML") || error_msg.contains("YAML"),
671 "Error message should mention YAML parsing: {error_msg}"
672 );
673 }
674
675 for (i, malformed_yaml) in malformed_yaml_cases.iter().enumerate() {
677 let result = canonicalizer.canonicalize_yaml(malformed_yaml);
678 assert!(
679 result.is_err(),
680 "canonicalize_yaml case {i} should return error: {malformed_yaml}"
681 );
682 }
683
684 let markdown_inputs = vec![
686 "# Valid markdown",
687 "Invalid markdown without proper structure",
688 "```\ncode without language\n```",
689 "~~~\ncode with tildes\n~~~",
690 ];
691
692 for markdown_input in markdown_inputs {
693 let result = canonicalizer.normalize_markdown(markdown_input);
694 assert!(
695 result.is_ok(),
696 "Markdown normalization should be forgiving: {markdown_input}"
697 );
698 }
699 }
700
701 #[test]
702 fn test_canonicalization_with_context_error_handling() {
703 let canonicalizer = Canonicalizer::new();
704
705 let malformed_yaml = "invalid: yaml: [unclosed";
707 let result = canonicalizer.hash_canonicalized_with_context(
708 malformed_yaml,
709 FileType::Yaml,
710 "TEST_PHASE",
711 );
712
713 assert!(result.is_err());
714
715 match result.unwrap_err() {
717 XCheckerError::CanonicalizationFailed { phase, reason } => {
718 assert_eq!(phase, "TEST_PHASE");
719 assert!(reason.contains("Failed to parse YAML"));
720 }
721 other => panic!("Expected CanonicalizationFailed, got: {other:?}"),
722 }
723 }
724
725 #[test]
728 fn test_canonicalization_with_empty_content() {
729 let canonicalizer = Canonicalizer::new();
730
731 let empty_yaml = "";
733 let result = canonicalizer.canonicalize_yaml(empty_yaml);
734 assert!(result.is_ok());
735 let canonicalized = result.unwrap();
736 assert_eq!(canonicalized, "null\n");
738
739 let empty_md = "";
741 let result = canonicalizer.normalize_markdown(empty_md);
742 assert!(result.is_ok());
743 let normalized = result.unwrap();
744 assert_eq!(normalized, "\n");
746
747 let empty_text = "";
749 let normalized_text = canonicalizer.normalize_text(empty_text);
750 assert_eq!(normalized_text, "");
751
752 let hash_result = canonicalizer.hash_canonicalized(empty_text, FileType::Text);
754 assert!(hash_result.is_ok());
755 let hash = hash_result.unwrap();
756 assert!(!hash.is_empty());
757 assert_eq!(hash.len(), 64); }
759
760 #[test]
761 fn test_canonicalization_with_special_characters() {
762 let canonicalizer = Canonicalizer::new();
763
764 let yaml_with_special = r#"
766name: "test-with-special-chars: @#$%^&*()"
767description: "Line with\ttabs and\nnewlines"
768unicode: "Hello 世界 🌍"
769quotes: 'single "quotes" inside'
770"#;
771
772 let result = canonicalizer.canonicalize_yaml(yaml_with_special);
773 assert!(result.is_ok());
774 let canonicalized = result.unwrap();
775 assert!(canonicalized.ends_with('\n'));
776 assert!(!canonicalized.contains('\r'));
777
778 let md_with_special = r"# Title with @#$%
780
781Content with **bold** and *italic* and `code`.
782
783- List item with special: <>[]{}
784- Unicode: 你好 مرحبا Здравствуйте
785
786```rust
787fn test() { /* comment */ }
788```
789";
790
791 let result = canonicalizer.normalize_markdown(md_with_special);
792 assert!(result.is_ok());
793 let normalized = result.unwrap();
794 assert!(normalized.ends_with('\n'));
795 assert!(!normalized.contains('\r'));
796 assert!(normalized.contains("你好"));
797 assert!(normalized.contains("مرحبا"));
798
799 let hash1 = canonicalizer
801 .hash_canonicalized(md_with_special, FileType::Markdown)
802 .unwrap();
803 let hash2 = canonicalizer
804 .hash_canonicalized(md_with_special, FileType::Markdown)
805 .unwrap();
806 assert_eq!(hash1, hash2);
807 }
808
809 #[test]
810 fn test_canonicalization_with_unicode() {
811 let canonicalizer = Canonicalizer::new();
812
813 let yaml_unicode = r#"
815chinese: "中文测试"
816arabic: "اختبار عربي"
817russian: "Русский тест"
818emoji: "🚀 🌟 ✨"
819mixed: "Hello 世界 🌍"
820"#;
821
822 let result = canonicalizer.canonicalize_yaml(yaml_unicode);
823 assert!(result.is_ok());
824 let canonicalized = result.unwrap();
825 assert!(canonicalized.contains("中文测试"));
826 assert!(canonicalized.contains("اختبار عربي"));
827 assert!(canonicalized.contains("Русский тест"));
828 assert!(canonicalized.contains("🚀"));
829
830 let md_unicode = r"# Unicode Test 测试
832
833## Section with العربية
834
835Content with Русский and 日本語.
836
837- 中文
838- العربية
839- Русский
840- 日本語
841
842Emoji: 🎉 🎊 🎈
843";
844
845 let result = canonicalizer.normalize_markdown(md_unicode);
846 assert!(result.is_ok());
847 let normalized = result.unwrap();
848 assert!(normalized.contains("测试"));
849 assert!(normalized.contains("العربية"));
850 assert!(normalized.contains("Русский"));
851 assert!(normalized.contains("日本語"));
852 assert!(normalized.contains("🎉"));
853
854 let hash1 = canonicalizer
856 .hash_canonicalized(yaml_unicode, FileType::Yaml)
857 .unwrap();
858 let hash2 = canonicalizer
859 .hash_canonicalized(yaml_unicode, FileType::Yaml)
860 .unwrap();
861 assert_eq!(hash1, hash2);
862
863 let yaml_unicode_2 = r#"
865chinese: "不同的中文"
866arabic: "مختلف عربي"
867"#;
868 let hash3 = canonicalizer
869 .hash_canonicalized(yaml_unicode_2, FileType::Yaml)
870 .unwrap();
871 assert_ne!(hash1, hash3);
872 }
873
874 #[test]
875 fn test_canonicalization_with_whitespace_only() {
876 let canonicalizer = Canonicalizer::new();
877
878 let whitespace_yaml = " \n \n ";
880 let result = canonicalizer.canonicalize_yaml(whitespace_yaml);
881 assert!(result.is_ok());
882 let canonicalized = result.unwrap();
883 assert_eq!(canonicalized, "null\n");
885
886 let yaml_with_tabs = " \n\t\n ";
888 let result = canonicalizer.canonicalize_yaml(yaml_with_tabs);
889 assert!(
890 result.is_err(),
891 "YAML with tabs at start of line should fail to parse"
892 );
893
894 let whitespace_md = " \n\t\n ";
896 let result = canonicalizer.normalize_markdown(whitespace_md);
897 assert!(result.is_ok());
898 let normalized = result.unwrap();
899 assert_eq!(normalized, "\n");
901
902 let whitespace_text = " \n\t\n ";
904 let normalized_text = canonicalizer.normalize_text(whitespace_text);
905 assert_eq!(normalized_text, " \n\t\n ");
906 }
907
908 #[test]
909 fn test_canonicalization_with_very_long_lines() {
910 let canonicalizer = Canonicalizer::new();
911
912 let long_value = "a".repeat(10000);
914 let yaml_long = format!("key: \"{long_value}\"");
915 let result = canonicalizer.canonicalize_yaml(&yaml_long);
916 assert!(result.is_ok());
917
918 let md_long = format!("# Title\n\n{}\n", "x".repeat(10000));
920 let result = canonicalizer.normalize_markdown(&md_long);
921 assert!(result.is_ok());
922 let normalized = result.unwrap();
923 assert!(normalized.contains(&"x".repeat(10000)));
924 }
925
926 #[test]
927 fn test_canonicalization_with_mixed_line_endings() {
928 let canonicalizer = Canonicalizer::new();
929
930 let yaml_mixed = "key1: value1\r\nkey2: value2\nkey3: value3\r";
932 let result = canonicalizer.canonicalize_yaml(yaml_mixed);
933 assert!(result.is_ok());
934 let canonicalized = result.unwrap();
935 assert!(!canonicalized.contains('\r'));
936 assert!(canonicalized.contains("key1"));
937 assert!(canonicalized.contains("key2"));
938 assert!(canonicalized.contains("key3"));
939
940 let md_mixed = "# Title\r\n\r\nContent\nMore content\r";
942 let result = canonicalizer.normalize_markdown(md_mixed);
943 assert!(result.is_ok());
944 let normalized = result.unwrap();
945 assert!(!normalized.contains('\r'));
946 assert_eq!(normalized, "# Title\n\nContent\nMore content\n");
947 }
948
949 #[test]
952 fn test_canonicalization_empty_content() {
953 let canonicalizer = Canonicalizer::new();
954
955 let empty_yaml = "";
957 let result = canonicalizer.canonicalize_yaml(empty_yaml);
958 assert!(result.is_ok());
959 let canonicalized = result.unwrap();
960 assert!(canonicalized.contains("null") || canonicalized == "\n");
962
963 let empty_md = "";
965 let result = canonicalizer.normalize_markdown(empty_md);
966 assert!(result.is_ok());
967 assert_eq!(result.unwrap(), "\n");
968
969 let empty_text = "";
971 let normalized = canonicalizer.normalize_text(empty_text);
972 assert_eq!(normalized, "");
973 }
974
975 #[test]
976 fn test_canonicalization_special_characters() {
977 let canonicalizer = Canonicalizer::new();
978
979 let yaml_with_special = r#"
981name: "test@#$%^&*()"
982value: "quotes\"and'apostrophes"
983path: "C:\\Windows\\System32"
984"#;
985 let result = canonicalizer.canonicalize_yaml(yaml_with_special);
986 assert!(result.is_ok());
987 let canonicalized = result.unwrap();
988 assert!(canonicalized.contains("test@#$%^&*()"));
989 assert!(!canonicalized.contains('\r'));
990
991 let md_with_special = "# Title with @#$%\n\nContent with <>&\"'\n";
993 let result = canonicalizer.normalize_markdown(md_with_special);
994 assert!(result.is_ok());
995 let normalized = result.unwrap();
996 assert!(normalized.contains("@#$%"));
997 assert!(normalized.contains("<>&\"'"));
998 }
999
1000 #[test]
1001 fn test_canonicalization_unicode() {
1002 let canonicalizer = Canonicalizer::new();
1003
1004 let yaml_with_unicode = r#"
1006name: "Hello 世界 🌍"
1007emoji: "🚀 ✨ 🎉"
1008chinese: "中文测试"
1009arabic: "مرحبا"
1010"#;
1011 let result = canonicalizer.canonicalize_yaml(yaml_with_unicode);
1012 assert!(result.is_ok());
1013 let canonicalized = result.unwrap();
1014 assert!(canonicalized.contains("世界"));
1015 assert!(canonicalized.contains("🌍"));
1016 assert!(canonicalized.contains("🚀"));
1017 assert!(canonicalized.contains("中文测试"));
1018 assert!(canonicalized.contains("مرحبا"));
1019
1020 let md_with_unicode = "# 标题 Title\n\nContent with émojis: 😀 🎨 ✅\n\nРусский текст\n";
1022 let result = canonicalizer.normalize_markdown(md_with_unicode);
1023 assert!(result.is_ok());
1024 let normalized = result.unwrap();
1025 assert!(normalized.contains("标题"));
1026 assert!(normalized.contains("😀"));
1027 assert!(normalized.contains("Русский"));
1028
1029 let unicode_text = "Hello 世界 🌍";
1031 let hash1 = canonicalizer
1032 .hash_canonicalized(unicode_text, FileType::Text)
1033 .unwrap();
1034 let hash2 = canonicalizer
1035 .hash_canonicalized(unicode_text, FileType::Text)
1036 .unwrap();
1037 assert_eq!(hash1, hash2);
1038 assert_eq!(hash1.len(), 64); }
1040
1041 #[test]
1042 fn test_canonicalization_whitespace_edge_cases() {
1043 let canonicalizer = Canonicalizer::new();
1044
1045 let yaml_with_whitespace = "name: test \nvalue: \t data \t\n";
1047 let result = canonicalizer.canonicalize_yaml(yaml_with_whitespace);
1048 assert!(result.is_ok());
1049 let canonicalized = result.unwrap();
1050 assert!(!canonicalized.contains(" \n")); assert!(!canonicalized.contains('\t'));
1052
1053 let md_with_trailing = "# Title \n\nParagraph with trailing spaces \n\n\n\n";
1055 let result = canonicalizer.normalize_markdown(md_with_trailing);
1056 assert!(result.is_ok());
1057 let normalized = result.unwrap();
1058 assert!(!normalized.contains(" \n")); assert!(!normalized.ends_with("\n\n\n")); assert!(normalized.ends_with('\n'));
1061 }
1062
1063 #[test]
1064 fn test_hash_with_empty_content() {
1065 let canonicalizer = Canonicalizer::new();
1066
1067 let hash1 = canonicalizer
1069 .hash_canonicalized("", FileType::Text)
1070 .unwrap();
1071 let hash2 = canonicalizer
1072 .hash_canonicalized("", FileType::Text)
1073 .unwrap();
1074 assert_eq!(hash1, hash2);
1075 assert_eq!(hash1.len(), 64);
1076
1077 let hash3 = canonicalizer
1079 .hash_canonicalized("", FileType::Yaml)
1080 .unwrap();
1081 let hash4 = canonicalizer
1082 .hash_canonicalized("", FileType::Yaml)
1083 .unwrap();
1084 assert_eq!(hash3, hash4);
1085 }
1086
1087 #[test]
1088 fn test_invalid_yaml_handling() {
1089 let canonicalizer = Canonicalizer::new();
1090
1091 let truly_invalid = "{ unclosed bracket";
1093 let hash_result = canonicalizer.hash_canonicalized(truly_invalid, FileType::Yaml);
1094 assert!(hash_result.is_err());
1095
1096 let malformed = "---\n[invalid";
1098 let result2 = canonicalizer.hash_canonicalized(malformed, FileType::Yaml);
1099 assert!(result2.is_err());
1100 }
1101}