Skip to main content

git_checks/
invalid_utf8.rs

1// Copyright Kitware, Inc.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9use derive_builder::Builder;
10use git_checks_core::impl_prelude::*;
11
12use std::char::REPLACEMENT_CHARACTER;
13
14/// A check which denies commits which modify files containing special characters.
15///
16/// Files may be marked as binary by unsetting the `text` attribute.
17#[derive(Builder, Debug, Default, Clone, Copy)]
18#[non_exhaustive]
19#[builder(field(private))]
20pub struct InvalidUtf8 {}
21
22impl InvalidUtf8 {
23    /// Create a new builder.
24    pub fn builder() -> InvalidUtf8Builder {
25        Default::default()
26    }
27}
28
29impl ContentCheck for InvalidUtf8 {
30    fn name(&self) -> &str {
31        "invalid-utf8"
32    }
33
34    fn check(
35        &self,
36        ctx: &CheckGitContext,
37        content: &dyn Content,
38    ) -> Result<CheckResult, Box<dyn Error>> {
39        let mut result = CheckResult::new();
40
41        for diff in content.diffs() {
42            match diff.status {
43                StatusChange::Added | StatusChange::Modified(_) => (),
44                _ => continue,
45            }
46
47            let diff_attr = ctx.check_attr("diff", diff.name.as_path())?;
48            if let AttributeState::Unset = diff_attr {
49                // Binary files should not be handled here.
50                continue;
51            }
52
53            let patch = match content.path_diff(&diff.name) {
54                Ok(s) => s,
55                Err(err) => {
56                    result.add_alert(
57                        format!(
58                            "{}failed to get the diff for file `{}`: {err}.",
59                            commit_prefix(content),
60                            diff.name,
61                        ),
62                        true,
63                    );
64                    continue;
65                },
66            };
67
68            for line in patch.lines().filter(|line| line.starts_with('+')) {
69                if line.contains(REPLACEMENT_CHARACTER) {
70                    // Escape instances of backticks and backslashes.
71                    let safe_line = line[1..].replace('\\', "\\\\").replace('`', "\\`");
72                    result.add_error(format!(
73                        "{}invalid utf-8 sequence added in `{}`: `{safe_line}`.",
74                        commit_prefix_str(content, "not allowed;"),
75                        diff.name,
76                    ));
77                }
78            }
79        }
80
81        Ok(result)
82    }
83}
84
85#[cfg(feature = "config")]
86pub(crate) mod config {
87    use git_checks_config::{register_checks, CommitCheckConfig, IntoCheck, TopicCheckConfig};
88    use serde::Deserialize;
89    #[cfg(test)]
90    use serde_json::json;
91
92    use crate::InvalidUtf8;
93
94    /// Configuration for the `InvalidUtf8` check.
95    ///
96    /// No configuration available.
97    ///
98    /// This check is registered as a commit check with the name `"invalid_utf8"` and a topic check
99    /// with the name `"invalid_utf8/topic"`.
100    #[derive(Deserialize, Debug)]
101    pub struct InvalidUtf8Config {}
102
103    impl IntoCheck for InvalidUtf8Config {
104        type Check = InvalidUtf8;
105
106        fn into_check(self) -> Self::Check {
107            Default::default()
108        }
109    }
110
111    register_checks! {
112        InvalidUtf8Config {
113            "invalid_utf8" => CommitCheckConfig,
114            "invalid_utf8/topic" => TopicCheckConfig,
115        },
116    }
117
118    #[test]
119    fn test_invalid_utf8_config_empty() {
120        let json = json!({});
121        let check: InvalidUtf8Config = serde_json::from_value(json).unwrap();
122
123        let _ = check.into_check();
124    }
125}
126
127#[cfg(test)]
128mod tests {
129    use git_checks_core::{Check, TopicCheck};
130
131    use crate::test::*;
132    use crate::InvalidUtf8;
133
134    const BAD_TOPIC: &str = "cf16b71a21023320ffab7b3f7673dc62f33e5022";
135    const DELETE_TOPIC: &str = "0f5b216e14e1af3e8e8b5bbb8116c3a89a35f73f";
136    const FIX_TOPIC: &str = "e8763477e9ebef4a61d130724cee9e29b13f857e";
137    const BINARY_COMMIT: &str = "2b10dde72c3d11e8d3ce14417cefab810f545eeb";
138
139    #[test]
140    fn test_invalid_utf8_builder_default() {
141        assert!(InvalidUtf8::builder().build().is_ok());
142    }
143
144    #[test]
145    fn test_invalid_utf8_name_commit() {
146        let check = InvalidUtf8::default();
147        assert_eq!(Check::name(&check), "invalid-utf8");
148    }
149
150    #[test]
151    fn test_invalid_utf8_name_topic() {
152        let check = InvalidUtf8::default();
153        assert_eq!(TopicCheck::name(&check), "invalid-utf8");
154    }
155
156    #[test]
157    fn test_invalid_utf8() {
158        let check = InvalidUtf8::default();
159        let result = run_check("test_invalid_utf8", BAD_TOPIC, check);
160        test_result_errors(result, &[
161            "commit cf16b71a21023320ffab7b3f7673dc62f33e5022 not allowed; invalid utf-8 sequence \
162             added in `invalid-utf8`: `This file contains an invalid utf-8 sequence: \u{fffd}`.",
163        ]);
164    }
165
166    #[test]
167    fn test_invalid_utf8_topic() {
168        let check = InvalidUtf8::default();
169        let result = run_topic_check("test_invalid_utf8_topic", BAD_TOPIC, check);
170        test_result_errors(result, &[
171            "invalid utf-8 sequence added in `invalid-utf8`: `This file contains an invalid utf-8 \
172             sequence: \u{fffd}`.",
173        ]);
174    }
175
176    #[test]
177    fn test_invalid_utf8_delete_file() {
178        let check = InvalidUtf8::default();
179        let conf = make_check_conf(&check);
180
181        let result = test_check_base(
182            "test_invalid_utf8_delete_file",
183            DELETE_TOPIC,
184            BAD_TOPIC,
185            &conf,
186        );
187        test_result_ok(result);
188    }
189
190    #[test]
191    fn test_invalid_utf8_delete_file_topic() {
192        let check = InvalidUtf8::default();
193        let result = run_topic_check("test_invalid_utf8_delete_file_topic", DELETE_TOPIC, check);
194        test_result_ok(result);
195    }
196
197    #[test]
198    fn test_invalid_utf8_topic_fixed() {
199        let check = InvalidUtf8::default();
200        run_topic_check_ok("test_invalid_utf8_topic_fixed", FIX_TOPIC, check);
201    }
202
203    #[test]
204    fn test_invalid_utf8_ignore_by_binary() {
205        let check = InvalidUtf8::default();
206        run_check_ok("test_invalid_utf8_ignore_by_binary", BINARY_COMMIT, check);
207    }
208
209    #[test]
210    fn test_invalid_utf8_ignore_by_binary_topic() {
211        let check = InvalidUtf8::default();
212        run_topic_check_ok(
213            "test_invalid_utf8_ignore_by_binary_topic",
214            BINARY_COMMIT,
215            check,
216        );
217    }
218}