Skip to main content

grit_lib/
unicode_normalization.rs

1//! UTF-8 NFC path normalization for macOS-style filesystems (`core.precomposeUnicode`).
2//!
3//! When the filesystem treats NFD and NFC spellings as the same path, Git stores paths in
4//! precomposed (NFC) form. This module implements the same normalization using ICU.
5
6use icu_normalizer::ComposingNormalizerBorrowed;
7use std::borrow::Cow;
8use std::ffi::OsString;
9use std::fs::OpenOptions;
10use std::io::Write;
11use std::path::{Path, PathBuf};
12
13/// Return true if `s` contains any non-ASCII UTF-8 byte.
14#[must_use]
15pub fn has_non_ascii_utf8(s: &str) -> bool {
16    s.as_bytes().iter().any(|b| *b & 0x80 != 0)
17}
18
19/// Normalize a single path segment (no `/`) to NFC when it contains non-ASCII UTF-8.
20#[must_use]
21pub fn precompose_utf8_segment(s: &str) -> Cow<'_, str> {
22    if !has_non_ascii_utf8(s) {
23        return Cow::Borrowed(s);
24    }
25    let normalized = ComposingNormalizerBorrowed::new_nfc().normalize(s);
26    if normalized == s {
27        Cow::Borrowed(s)
28    } else {
29        Cow::Owned(normalized.into_owned())
30    }
31}
32
33/// Normalize every `/`-separated segment of `path` to NFC.
34#[must_use]
35pub fn precompose_utf8_path(path: &str) -> Cow<'_, str> {
36    if !path.as_bytes().iter().any(|b| *b & 0x80 != 0) {
37        return Cow::Borrowed(path);
38    }
39    let mut buf = String::with_capacity(path.len());
40    for (i, seg) in path.split('/').enumerate() {
41        if i > 0 {
42            buf.push('/');
43        }
44        let c = precompose_utf8_segment(seg);
45        buf.push_str(c.as_ref());
46    }
47    if buf == path {
48        Cow::Borrowed(path)
49    } else {
50        Cow::Owned(buf)
51    }
52}
53
54/// Update `s` in place when it is valid UTF-8 and NFC differs from the current spelling.
55pub fn precompose_os_string_utf8_path(s: &mut OsString, enabled: bool) {
56    if !enabled {
57        return;
58    }
59    let Some(utf8) = s.to_str() else {
60        return;
61    };
62    let normalized = precompose_utf8_path(utf8).into_owned();
63    if normalized != utf8 {
64        *s = OsString::from(normalized);
65    }
66}
67
68/// Probe whether creating a file under `git_dir` with an NFC filename makes the NFD spelling
69/// visible as the same path (macOS / HFS+ style).
70///
71/// Matches Git's `probe_utf8_pathname_composition` / `UTF8_NFD_TO_NFC` test prerequisite.
72pub fn probe_filesystem_normalizes_nfd_to_nfc(git_dir: &Path) -> std::io::Result<bool> {
73    const NFC: &str = "\u{00e4}";
74    const NFD: &str = "\u{0061}\u{0308}";
75    let nfc_path: PathBuf = git_dir.join(NFC);
76    let _ = std::fs::remove_file(&nfc_path);
77    {
78        let mut f = OpenOptions::new()
79            .create_new(true)
80            .write(true)
81            .open(&nfc_path)?;
82        f.write_all(b"x")?;
83    }
84    let nfd_path = git_dir.join(NFD);
85    let aliases = nfd_path.exists();
86    let _ = std::fs::remove_file(&nfc_path);
87    Ok(aliases)
88}
89
90#[cfg(test)]
91mod tests {
92    use super::*;
93
94    #[test]
95    fn precompose_nfd_filename_to_nfc() {
96        // Matches t3910: Adiarnfc = UTF-8 \303\204 (U+00C4), Adiarnfd = A + U+0308.
97        let nfd = format!("f.{}\u{0308}", 'A');
98        let nfc = format!("f.\u{00c4}");
99        assert_eq!(precompose_utf8_path(&nfd).as_ref(), nfc.as_str());
100    }
101}