Skip to main content

nzb_dispatch/
util.rs

1use unicode_normalization::UnicodeNormalization;
2
3/// Normalize a string to Unicode NFC form.
4///
5/// Used for filenames from sources not covered by nzb-core's parser
6/// (e.g. yEnc headers from nzb-decode). NZB-derived names are already
7/// normalized by nzb-core v0.1.1+.
8pub fn normalize_nfc(s: &str) -> String {
9    s.nfc().collect()
10}
11
12#[cfg(test)]
13mod tests {
14    use super::*;
15
16    #[test]
17    fn nfc_normalization_composes_decomposed() {
18        // "café" in NFD: 'e' + combining acute accent (U+0301)
19        let nfd = "caf\u{0065}\u{0301}";
20        // "café" in NFC: precomposed 'é' (U+00E9)
21        let nfc = "caf\u{00E9}";
22        assert_eq!(normalize_nfc(nfd), nfc);
23    }
24
25    #[test]
26    fn nfc_normalization_preserves_ascii() {
27        let ascii = "My.Show.S01E01.720p.mkv";
28        assert_eq!(normalize_nfc(ascii), ascii);
29    }
30
31    #[test]
32    fn nfc_normalization_preserves_already_nfc() {
33        let already_nfc = "Stra\u{00DF}e.nzb";
34        assert_eq!(normalize_nfc(already_nfc), already_nfc);
35    }
36
37    #[test]
38    fn nfc_normalization_handles_hangul() {
39        // Hangul decomposed: ᄀ (U+1100) + ᅡ (U+1161) → 가 (U+AC00)
40        let nfd = "\u{1100}\u{1161}";
41        let nfc = "\u{AC00}";
42        assert_eq!(normalize_nfc(nfd), nfc);
43    }
44}