1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
/// CleanPath is the URL version of path.Clean, it returns a canonical URL path
/// for p, eliminating . and .. elements.
///
/// The following rules are applied iteratively until no further processing can
/// be done:
///	1. Replace multiple slashes with a single slash.
///	2. Eliminate each . path name element (the current directory).
///	3. Eliminate each inner .. path name element (the parent directory)
///	   along with the non-.. element that precedes it.
///	4. Eliminate .. elements that begin a rooted path:
///	   that is, replace "/.." by "/" at the beginning of a path.
///
/// If the result of this process is an empty string, "/" is returned
pub fn clean_path(p: &str) -> String {
    // Turn empty string into "/"
    if p == "" {
        return "/".to_string();
    }

    let n = p.len();
    let mut buf: Vec<u8> = Vec::new();

    // Invariants:
	//      reading from path; r is index of next byte to process.
	//      writing to buf; w is index of next byte to write.

	// path must start with '/'

    let mut r = 1;
    let mut w = 1;

    if !p.starts_with("/") {
        r = 0;
        buf.resize(n + 1, 0);
        buf[0] = b'/';
    }

    let mut trailing = n > 1 && p.ends_with("/");
    let p = p.as_bytes();

    // A bit more clunky without a 'lazybuf' like the path package, but the loop
	// gets completely inlined (bufApp). So in contrast to the path package this
	// loop has no expensive function calls (except 1x make)

    while r < n {
        match p[r] {
            b'/' => r += 1,  // empty path element, trailing slash is added after the end
            b'.' => {
                if r + 1 == n {
                    trailing = true;
                    r += 1;
                } else if p[r + 1] == b'/' {
                    // . element
                    r += 2;
                } else if p[r + 1] == b'.' && (r + 2 == n || p[r + 2] == b'/') {
                    // .. element: remove to last /
                    r += 3;

                    if w > 1 {
                        // can backtrack
                        w -= 1;

                        if buf.is_empty() {
                            while w > 1 && p[w] != b'/' {
                                w -= 1;
                            }
                        } else {
                            while w > 1 && buf[w] != b'/' {
                                w -= 1;
                            }
                        }
                    }
                }
            }
            _ => {
                // real path element.
			    // add slash if needed
                if w > 1 {
                    buf_app(&mut buf, p, w, b'/');
                    w += 1;
                }

                // copy element
                while r < n && p[r] != b'/' {
                    buf_app(&mut buf, p, w, p[r]);
                    w += 1;
                    r += 1;
                }
            }
        }
    }

    // re-append trailing slash
    if trailing && w > 1 {
        buf_app(&mut buf, p, w, b'/');
        w += 1;
    }

    if buf.is_empty() {
        return String::from_utf8(p[..w].to_vec()).unwrap();
    }
    String::from_utf8(buf[..w].to_vec()).unwrap()
}

/// internal helper to lazily create a buffer if necessary
fn buf_app(buf: &mut Vec<u8>, s: &[u8], w: usize, c: u8) {
    if buf.is_empty() {
        if s[w] == c {
            return;
        }
        buf.resize(s.len(), 0);
        buf[..w].copy_from_slice(&s[..w]);
    }
    buf[w] = c;
}

#[cfg(test)]
mod tests {
    use super::*;

    // path, result
    fn clean_tests() -> Vec<(&'static str, &'static str)> {
        vec![
            // Already clean
            ("/", "/"),
            ("/abc", "/abc"),
            ("/a/b/c", "/a/b/c"),
            ("/abc/", "/abc/"),
            ("/a/b/c/", "/a/b/c/"),
            // missing root
            ("", "/"),
            ("a/", "/a/"),
            ("abc", "/abc"),
            ("abc/def", "/abc/def"),
            ("a/b/c", "/a/b/c"),
            // Remove doubled slash
            ("//", "/"),
            ("/abc//", "/abc/"),
            ("/abc/def//", "/abc/def/"),
            ("/a/b/c//", "/a/b/c/"),
            ("/abc//def//ghi", "/abc/def/ghi"),
            ("//abc", "/abc"),
            ("///abc", "/abc"),
            ("//abc//", "/abc/"),
            // Remove . elements
            (".", "/"),
            ("./", "/"),
            ("/abc/./def", "/abc/def"),
            ("/./abc/def", "/abc/def"),
            ("/abc/.", "/abc/"),
            // Remove .. elements
            ("..", "/"),
            ("../", "/"),
            ("../../", "/"),
            ("../..", "/"),
            ("../../abc", "/abc"),
            ("/abc/def/ghi/../jkl", "/abc/def/jkl"),
            ("/abc/def/../ghi/../jkl", "/abc/jkl"),
            ("/abc/def/..", "/abc"),
            ("/abc/def/../..", "/"),
            ("/abc/def/../../..", "/"),
            ("/abc/def/../../..", "/"),
            ("/abc/def/../../../ghi/jkl/../../../mno", "/mno"),
            // Combinations
            ("abc/./../def", "/def"),
            ("abc//./../def", "/def"),
            ("abc/../../././../def", "/def"),
        ]
    }

    #[test]
    fn test_path_clean() {
        let tests = clean_tests();
        for test in tests {
            let s = clean_path(test.0);
            assert_eq!(test.1, s);

            let s = clean_path(test.1);
            assert_eq!(test.1, s);
        }
    }

    // #[test]
    // fn test_path_clean_mallocs() {

    // }
}