sanitize_git_ref/
lib.rs

1mod error;
2
3use crate::error::SanitizeGitRefError;
4
5struct SanitizeOptions {
6    allow_onelevel: bool,
7}
8
9/// Rules obtained from [git-check-ref-format].
10///
11/// This function sanitizes git refs with the assumption that `--allow-onelevel` is true.
12///
13/// [git-check-ref-format]: https://git-scm.com/docs/git-check-ref-format
14pub fn sanitize_git_ref_onelevel(text: &str) -> String {
15    let sanitized = sanitize(
16        text,
17        SanitizeOptions {
18            allow_onelevel: true,
19        },
20    );
21    sanitized.expect("Sanitization should always suceed when allow_onelevel is true")
22}
23
24/// Replace consecutive occurrences of `target` with hyphens
25fn sanitize_consecutive_run(string: String, target: char) -> String {
26    let mut current_run = 0;
27    string
28        .chars()
29        .map(|c| {
30            match c == target {
31                true => current_run += 1,
32                false => current_run = 0,
33            };
34            match current_run < 2 {
35                true => c,
36                false => '-',
37            }
38        })
39        .collect()
40}
41
42/// Remove consecutive occurrences of `target`
43fn elide_consecutive_run(mut string: String, target: char) -> String {
44    let mut current_run = 0;
45    string.retain(|c| {
46        match c == target {
47            true => current_run += 1,
48            false => current_run = 0,
49        };
50        current_run < 2
51    });
52    string
53}
54
55fn sanitize(text: &str, options: SanitizeOptions) -> Result<String, Box<SanitizeGitRefError>> {
56    let SanitizeOptions { allow_onelevel } = options;
57    let mut result = text.to_owned();
58
59    // They must contain at least one /. This enforces the presence of a
60    // category like heads/, tags/ etc. but the actual names are not restricted.
61    // If the --allow-onelevel option is used, this rule is waived.
62    if !allow_onelevel {
63        if !result.contains('/') {
64            return Err(Box::new(SanitizeGitRefError::DoesNotContainForwardSlash));
65        }
66    }
67
68    // They can include slash / for hierarchical (directory) grouping, but
69    // no slash-separated component can begin with a dot . or end with the
70    // sequence .lock.
71    if result.starts_with('.') {
72        result = result.replacen('.', "-", 1);
73    }
74    result = result.replace("/.", "/-");
75    // FIXME: this is overly cautious
76    result = result.replace(".lock", "-");
77
78    // They cannot contain a sequence @{.
79    result = result.replace("@{", "-");
80
81    result = result
82        .chars()
83        .map(|c| -> char {
84            // They cannot have ASCII control characters (i.e. bytes whose
85            // values are lower than \040, or \177 DEL).
86            if c.is_ascii_control() {
87                return '-';
88            }
89
90            // They cannot have space anywhere.
91            if c.is_whitespace() {
92                return '-';
93            }
94
95            match c {
96                // They cannot have tilde ~ anywhere.
97                '~'
98                // They cannot have caret ^ anywhere.
99                | '^'
100
101                // They cannot have colon : anywhere.
102                | ':'
103
104                // They cannot have question-mark ?, asterisk *, or open bracket
105                // [ anywhere. See the --refspec-pattern option below for an
106                // exception to this rule.
107                | '?'
108                | '*'
109                | '['
110
111                // They cannot contain a \.
112                | '\\'
113
114                // They cannot be the single character @.
115                | '@'
116
117                => '-',
118
119                _ => c,
120            }
121        })
122        .collect();
123
124    // They cannot contain multiple consecutive slashes (see the --normalize option below for an exception to this rule)
125    result = sanitize_consecutive_run(result, '/');
126
127    // They cannot have two consecutive dots .. anywhere.
128    result = sanitize_consecutive_run(result, '.');
129
130    // They cannot begin with a slash / (see the --normalize option below for an exception to this rule)
131    while result.starts_with('/') {
132        result = result.replacen('/', "-", 1);
133    }
134
135    // They cannot end with a dot .
136    // They cannot end with a slash / (see the --normalize option below for an exception to this rule)
137    while result.ends_with('/') || result.ends_with('.') {
138        result.pop();
139    }
140
141    // Convert any sequence of multiple hyphens into a single hyphen.
142    // We convert invalid characters into hyphens to prevent shrinking the input into an empty string.
143    result = elide_consecutive_run(result, '-');
144
145    Ok(result)
146}
147
148#[cfg(test)]
149mod test {
150    use crate::sanitize_git_ref_onelevel;
151
152    use proptest::prelude::*;
153
154    macro_rules! test_does_not_violate_branch_naming_rule {
155        ($unit_test:ident, $property_test:ident, $test_of_inclusion:expr, $unsanitized_branch_name:expr) => {
156            #[test]
157            fn $unit_test() {
158                let sanitized_branch_name = sanitize_git_ref_onelevel(&$unsanitized_branch_name);
159                assert!(
160                    !$test_of_inclusion(&sanitized_branch_name),
161                    "Expected unsanitized string {:?} to sanitize to a valid branch name, but {:?} is not a valid branch name",
162                    &$unsanitized_branch_name,
163                    &sanitized_branch_name
164                );
165            }
166
167            proptest! {
168                #[test]
169                fn $property_test(unsanitized_branch_name in any::<String>()) {
170                    let sanitized_branch_name = sanitize_git_ref_onelevel(&unsanitized_branch_name);
171                    assert!(
172                        !$test_of_inclusion(&sanitized_branch_name),
173                        "Expected unsanitized string {:?} to sanitize to a valid branch name, but {:?} is not a valid branch name",
174                        &unsanitized_branch_name,
175                        &sanitized_branch_name
176                    );
177                }
178            }
179        };
180    }
181
182    // They can include slash / for hierarchical (directory) grouping, but no slash-separated component can begin with a dot.
183    test_does_not_violate_branch_naming_rule!(
184        branch_name_does_not_contain_a_slash_separated_component_beginning_with_a_dot,
185        proptest_branch_name_does_not_contain_a_slash_separated_component_beginning_with_a_dot,
186        |branch_name: &str| -> bool {
187            for slash_separated_sequence in branch_name.split("/") {
188                if slash_separated_sequence.starts_with(".") {
189                    return true;
190                }
191            }
192            false
193        },
194        "refs/heads/.master"
195    );
196
197    // Branch names can include slash / for hierarchical (directory) grouping, but no slash-separated component can end with the sequence .lock.
198    test_does_not_violate_branch_naming_rule!(
199        branch_name_does_not_contain_a_slash_separated_component_ending_with_dot_lock,
200        proptest_branch_name_does_not_contain_a_slash_separated_component_ending_with_dot_lock,
201        |branch_name: &str| -> bool {
202            for slash_separated_sequence in branch_name.split("/") {
203                if slash_separated_sequence.ends_with(".lock") {
204                    return true;
205                }
206            }
207            false
208        },
209        "refs/heads/master.lock"
210    );
211
212    // They must contain at least one /. This enforces the presence of a category like heads/, tags/ etc. but the actual names are not restricted.
213    // If the --allow-onelevel option is used, this rule is waived.
214    // FIXME: Turn on this test when we implement sanitize_git_ref (sans allow-onelevel)
215    // fn has_at_least_one_slash<S: AsRef<str>>(branch_name: S) -> bool {
216    //     branch_name.as_ref().contains("/")
217    // }
218
219    // #[test]
220    // fn branch_name_has_at_least_one_slash() {
221    //     assert!(has_at_least_one_slash(sanitize_git_ref_onelevel(
222    //         "refs/heads/master"
223    //     )))
224    // }
225
226    test_does_not_violate_branch_naming_rule!(
227        branch_name_does_not_contain_two_consecutive_dots,
228        proptest_branch_name_does_not_contain_two_consecutive_dots,
229        |branch_name: &str| -> bool { branch_name.contains("..") },
230        "refs/heads/master..foo"
231    );
232
233    // They cannot have ASCII control characters (i.e. bytes whose values are lower than \040, or \177 DEL).
234    test_does_not_violate_branch_naming_rule!(
235        branch_name_does_not_contain_a_control_character,
236        proptest_branch_name_does_not_contain_a_control_character,
237        |branch_name: &str| -> bool { branch_name.contains(|c: char| c.is_ascii_control()) },
238        String::from("/refs/heads/master") + std::str::from_utf8(&[039]).unwrap() + "foo"
239    );
240
241    // They cannot have space anywhere.
242    test_does_not_violate_branch_naming_rule!(
243        branch_name_does_not_contain_a_space,
244        proptest_branch_name_does_not_contain_a_space,
245        |branch_name: &str| -> bool { branch_name.contains(char::is_whitespace) },
246        "/refs/heads/master foo"
247    );
248
249    // They cannot have tilde ~ anywhere.
250    test_does_not_violate_branch_naming_rule!(
251        branch_name_does_not_contain_a_tilde,
252        proptest_branch_name_does_not_contain_a_tilde,
253        |branch_name: &str| -> bool { branch_name.contains("?") },
254        "/refs/heads/master~foo"
255    );
256
257    // They cannot have caret ^ anywhere.
258    test_does_not_violate_branch_naming_rule!(
259        branch_name_does_not_contain_a_carat,
260        proptest_branch_name_does_not_contain_a_carat,
261        |branch_name: &str| -> bool { branch_name.contains("^") },
262        "/refs/heads/master^foo"
263    );
264
265    // They cannot have colon : anywhere.
266    test_does_not_violate_branch_naming_rule!(
267        branch_name_does_not_contain_a_colon,
268        proptest_branch_name_does_not_contain_a_colon,
269        |branch_name: &str| -> bool { branch_name.contains(":") },
270        "/refs/heads/master:foo"
271    );
272
273    // They cannot have question-mark ? anywhere. See the --refspec-pattern option below for an exception to this rule.
274    test_does_not_violate_branch_naming_rule!(
275        branch_name_does_not_contain_a_question_mark,
276        proptest_branch_name_does_not_contain_a_question_mark,
277        |branch_name: &str| -> bool { branch_name.starts_with("?") },
278        "/refs/heads/master?foo"
279    );
280
281    // They cannot have asterisk * anywhere. See the --refspec-pattern option below for an exception to this rule.
282    test_does_not_violate_branch_naming_rule!(
283        branch_name_does_not_contain_an_asterisk,
284        proptest_branch_name_does_not_contain_an_asterisk,
285        |branch_name: &str| -> bool { branch_name.starts_with("*") },
286        "/refs/heads/master*foo"
287    );
288
289    // They cannot have open bracket [ anywhere. See the --refspec-pattern option below for an exception to this rule.
290    test_does_not_violate_branch_naming_rule!(
291        branch_name_does_not_contain_an_open_bracket,
292        proptest_branch_name_does_not_contain_an_open_bracket,
293        |branch_name: &str| -> bool { branch_name.starts_with("[") },
294        "/refs/heads/master[foo"
295    );
296
297    // They cannot begin with a slash (/) (see the --normalize option for an exception to this rule)
298    test_does_not_violate_branch_naming_rule!(
299        branch_name_does_not_begin_with_a_forward_slash,
300        proptest_branch_name_does_not_begin_with_a_forward_slash,
301        |branch_name: &str| -> bool { branch_name.starts_with("/") },
302        "/refs/heads/master"
303    );
304
305    // They cannot begin with a slash (/) (see the --normalize option for an exception to this rule)
306    test_does_not_violate_branch_naming_rule!(
307        branch_name_does_not_end_with_a_forward_slash,
308        proptest_branch_name_does_not_end_with_a_forward_slash,
309        |branch_name: &str| -> bool { branch_name.ends_with("/") },
310        "refs/heads/master/"
311    );
312
313    // They cannot contain multiple consecutive slashes (see the --normalize option for an exception to this rule)
314    test_does_not_violate_branch_naming_rule!(
315        branch_name_does_not_contain_consecutive_forward_slashes,
316        proptest_branch_name_does_not_contain_consecutive_forward_slashes,
317        |branch_name: &str| -> bool { branch_name.contains("//") },
318        "refs/heads/master//all-right"
319    );
320
321    test_does_not_violate_branch_naming_rule!(
322        branch_name_does_not_contain_multiple_consecutive_forward_slashes,
323        proptest_branch_name_does_not_contain_multiple_consecutive_forward_slashes,
324        |branch_name: &str| -> bool { branch_name.contains("//") },
325        "refs/heads/master///all////right"
326    );
327
328    // They cannot end with a dot .
329    test_does_not_violate_branch_naming_rule!(
330        branch_name_does_not_end_with_dot,
331        proptest_branch_name_does_not_end_with_dot,
332        |branch_name: &str| -> bool { branch_name.ends_with(".") },
333        "refs/heads/master."
334    );
335
336    // They cannot contain a sequence @{.
337    test_does_not_violate_branch_naming_rule!(
338        branch_name_does_not_contain_ampersand_open_brace,
339        proptest_branch_name_does_not_contain_ampersand_open_brace,
340        |branch_name: &str| -> bool { branch_name.contains("@{") },
341        "refs/heads/master-@{-branch"
342    );
343
344    // FIXME: this implementation is too restrictive but I'm not exactly sure of the rules right now.
345    // Happy to widen this up if I get more clarity and feel confident we'll avoid false-positives.
346    // They cannot be the single character @.
347    test_does_not_violate_branch_naming_rule!(
348        branch_name_does_not_contain_ampersand,
349        proptest_branch_name_does_not_contain_ampersand,
350        |branch_name: &str| -> bool { branch_name.contains("@") },
351        "refs/heads/master-@-branch"
352    );
353
354    // They cannot contain a \.
355    test_does_not_violate_branch_naming_rule!(
356        branch_name_does_not_contain_backslash,
357        proptest_branch_name_does_not_contain_backslash,
358        |branch_name: &str| -> bool { branch_name.contains(r"\") },
359        r"refs/heads/master-\-branch"
360    );
361}