Skip to main content

git_checks/
invalid_utf8.rs

1// Copyright Kitware, Inc.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9use derive_builder::Builder;
10use git_checks_core::impl_prelude::*;
11
12use std::char::REPLACEMENT_CHARACTER;
13
14/// A check which denies commits which modify files containing special characters.
15///
16/// Files may be marked as binary by unsetting the `text` attribute.
17#[derive(Builder, Debug, Default, Clone, Copy)]
18#[builder(field(private))]
19pub struct InvalidUtf8 {}
20
21impl InvalidUtf8 {
22    /// Create a new builder.
23    pub fn builder() -> InvalidUtf8Builder {
24        Default::default()
25    }
26}
27
28impl ContentCheck for InvalidUtf8 {
29    fn name(&self) -> &str {
30        "invalid-utf8"
31    }
32
33    fn check(
34        &self,
35        ctx: &CheckGitContext,
36        content: &dyn Content,
37    ) -> Result<CheckResult, Box<dyn Error>> {
38        let mut result = CheckResult::new();
39
40        for diff in content.diffs() {
41            match diff.status {
42                StatusChange::Added | StatusChange::Modified(_) => (),
43                _ => continue,
44            }
45
46            let diff_attr = ctx.check_attr("diff", diff.name.as_path())?;
47            if let AttributeState::Unset = diff_attr {
48                // Binary files should not be handled here.
49                continue;
50            }
51
52            let patch = match content.path_diff(&diff.name) {
53                Ok(s) => s,
54                Err(err) => {
55                    result.add_alert(
56                        format!(
57                            "{}failed to get the diff for file `{}`: {err}.",
58                            commit_prefix(content),
59                            diff.name,
60                        ),
61                        true,
62                    );
63                    continue;
64                },
65            };
66
67            for line in patch.lines().filter(|line| line.starts_with('+')) {
68                if line.contains(REPLACEMENT_CHARACTER) {
69                    // Escape instances of backticks and backslashes.
70                    let safe_line = line[1..].replace('\\', "\\\\").replace('`', "\\`");
71                    result.add_error(format!(
72                        "{}invalid utf-8 sequence added in `{}`: `{safe_line}`.",
73                        commit_prefix_str(content, "not allowed;"),
74                        diff.name,
75                    ));
76                }
77            }
78        }
79
80        Ok(result)
81    }
82}
83
84#[cfg(feature = "config")]
85pub(crate) mod config {
86    use git_checks_config::{register_checks, CommitCheckConfig, IntoCheck, TopicCheckConfig};
87    use serde::Deserialize;
88    #[cfg(test)]
89    use serde_json::json;
90
91    use crate::InvalidUtf8;
92
93    /// Configuration for the `InvalidUtf8` check.
94    ///
95    /// No configuration available.
96    ///
97    /// This check is registered as a commit check with the name `"invalid_utf8"` and a topic check
98    /// with the name `"invalid_utf8/topic"`.
99    #[derive(Deserialize, Debug)]
100    pub struct InvalidUtf8Config {}
101
102    impl IntoCheck for InvalidUtf8Config {
103        type Check = InvalidUtf8;
104
105        fn into_check(self) -> Self::Check {
106            Default::default()
107        }
108    }
109
110    register_checks! {
111        InvalidUtf8Config {
112            "invalid_utf8" => CommitCheckConfig,
113            "invalid_utf8/topic" => TopicCheckConfig,
114        },
115    }
116
117    #[test]
118    fn test_invalid_utf8_config_empty() {
119        let json = json!({});
120        let check: InvalidUtf8Config = serde_json::from_value(json).unwrap();
121
122        let _ = check.into_check();
123    }
124}
125
126#[cfg(test)]
127mod tests {
128    use git_checks_core::{Check, TopicCheck};
129
130    use crate::test::*;
131    use crate::InvalidUtf8;
132
133    const BAD_TOPIC: &str = "cf16b71a21023320ffab7b3f7673dc62f33e5022";
134    const DELETE_TOPIC: &str = "0f5b216e14e1af3e8e8b5bbb8116c3a89a35f73f";
135    const FIX_TOPIC: &str = "e8763477e9ebef4a61d130724cee9e29b13f857e";
136    const BINARY_COMMIT: &str = "2b10dde72c3d11e8d3ce14417cefab810f545eeb";
137
138    #[test]
139    fn test_invalid_utf8_builder_default() {
140        assert!(InvalidUtf8::builder().build().is_ok());
141    }
142
143    #[test]
144    fn test_invalid_utf8_name_commit() {
145        let check = InvalidUtf8::default();
146        assert_eq!(Check::name(&check), "invalid-utf8");
147    }
148
149    #[test]
150    fn test_invalid_utf8_name_topic() {
151        let check = InvalidUtf8::default();
152        assert_eq!(TopicCheck::name(&check), "invalid-utf8");
153    }
154
155    #[test]
156    fn test_invalid_utf8() {
157        let check = InvalidUtf8::default();
158        let result = run_check("test_invalid_utf8", BAD_TOPIC, check);
159        test_result_errors(result, &[
160            "commit cf16b71a21023320ffab7b3f7673dc62f33e5022 not allowed; invalid utf-8 sequence \
161             added in `invalid-utf8`: `This file contains an invalid utf-8 sequence: \u{fffd}`.",
162        ]);
163    }
164
165    #[test]
166    fn test_invalid_utf8_topic() {
167        let check = InvalidUtf8::default();
168        let result = run_topic_check("test_invalid_utf8_topic", BAD_TOPIC, check);
169        test_result_errors(result, &[
170            "invalid utf-8 sequence added in `invalid-utf8`: `This file contains an invalid utf-8 \
171             sequence: \u{fffd}`.",
172        ]);
173    }
174
175    #[test]
176    fn test_invalid_utf8_delete_file() {
177        let check = InvalidUtf8::default();
178        let conf = make_check_conf(&check);
179
180        let result = test_check_base(
181            "test_invalid_utf8_delete_file",
182            DELETE_TOPIC,
183            BAD_TOPIC,
184            &conf,
185        );
186        test_result_ok(result);
187    }
188
189    #[test]
190    fn test_invalid_utf8_delete_file_topic() {
191        let check = InvalidUtf8::default();
192        let result = run_topic_check("test_invalid_utf8_delete_file_topic", DELETE_TOPIC, check);
193        test_result_ok(result);
194    }
195
196    #[test]
197    fn test_invalid_utf8_topic_fixed() {
198        let check = InvalidUtf8::default();
199        run_topic_check_ok("test_invalid_utf8_topic_fixed", FIX_TOPIC, check);
200    }
201
202    #[test]
203    fn test_invalid_utf8_ignore_by_binary() {
204        let check = InvalidUtf8::default();
205        run_check_ok("test_invalid_utf8_ignore_by_binary", BINARY_COMMIT, check);
206    }
207
208    #[test]
209    fn test_invalid_utf8_ignore_by_binary_topic() {
210        let check = InvalidUtf8::default();
211        run_topic_check_ok(
212            "test_invalid_utf8_ignore_by_binary_topic",
213            BINARY_COMMIT,
214            check,
215        );
216    }
217}