grit_lib/
unicode_normalization.rs1use icu_normalizer::ComposingNormalizerBorrowed;
7use std::borrow::Cow;
8use std::ffi::OsString;
9use std::fs::OpenOptions;
10use std::io::Write;
11use std::path::{Path, PathBuf};
12
13#[must_use]
15pub fn has_non_ascii_utf8(s: &str) -> bool {
16 s.as_bytes().iter().any(|b| *b & 0x80 != 0)
17}
18
19#[must_use]
21pub fn precompose_utf8_segment(s: &str) -> Cow<'_, str> {
22 if !has_non_ascii_utf8(s) {
23 return Cow::Borrowed(s);
24 }
25 let normalized = ComposingNormalizerBorrowed::new_nfc().normalize(s);
26 if normalized == s {
27 Cow::Borrowed(s)
28 } else {
29 Cow::Owned(normalized.into_owned())
30 }
31}
32
33#[must_use]
35pub fn precompose_utf8_path(path: &str) -> Cow<'_, str> {
36 if !path.as_bytes().iter().any(|b| *b & 0x80 != 0) {
37 return Cow::Borrowed(path);
38 }
39 let mut buf = String::with_capacity(path.len());
40 for (i, seg) in path.split('/').enumerate() {
41 if i > 0 {
42 buf.push('/');
43 }
44 let c = precompose_utf8_segment(seg);
45 buf.push_str(c.as_ref());
46 }
47 if buf == path {
48 Cow::Borrowed(path)
49 } else {
50 Cow::Owned(buf)
51 }
52}
53
54pub fn precompose_os_string_utf8_path(s: &mut OsString, enabled: bool) {
56 if !enabled {
57 return;
58 }
59 let Some(utf8) = s.to_str() else {
60 return;
61 };
62 let normalized = precompose_utf8_path(utf8).into_owned();
63 if normalized != utf8 {
64 *s = OsString::from(normalized);
65 }
66}
67
68pub fn probe_filesystem_normalizes_nfd_to_nfc(git_dir: &Path) -> std::io::Result<bool> {
73 const NFC: &str = "\u{00e4}";
74 const NFD: &str = "\u{0061}\u{0308}";
75 let nfc_path: PathBuf = git_dir.join(NFC);
76 let _ = std::fs::remove_file(&nfc_path);
77 {
78 let mut f = OpenOptions::new()
79 .create_new(true)
80 .write(true)
81 .open(&nfc_path)?;
82 f.write_all(b"x")?;
83 }
84 let nfd_path = git_dir.join(NFD);
85 let aliases = nfd_path.exists();
86 let _ = std::fs::remove_file(&nfc_path);
87 Ok(aliases)
88}
89
90#[cfg(test)]
91mod tests {
92 use super::*;
93
94 #[test]
95 fn precompose_nfd_filename_to_nfc() {
96 let nfd = format!("f.{}\u{0308}", 'A');
98 let nfc = format!("f.\u{00c4}");
99 assert_eq!(precompose_utf8_path(&nfd).as_ref(), nfc.as_str());
100 }
101}