git_checks/
invalid_utf8.rs

1// Copyright Kitware, Inc.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9use derive_builder::Builder;
10use git_checks_core::impl_prelude::*;
11
12use std::char::REPLACEMENT_CHARACTER;
13
14/// A check which denies commits which modify files containing special characters.
15///
16/// Files may be marked as binary by unsetting the `text` attribute.
17#[derive(Builder, Debug, Default, Clone, Copy)]
18#[builder(field(private))]
19pub struct InvalidUtf8 {}
20
21impl InvalidUtf8 {
22    /// Create a new builder.
23    pub fn builder() -> InvalidUtf8Builder {
24        Default::default()
25    }
26}
27
28impl ContentCheck for InvalidUtf8 {
29    fn name(&self) -> &str {
30        "invalid-utf8"
31    }
32
33    fn check(
34        &self,
35        ctx: &CheckGitContext,
36        content: &dyn Content,
37    ) -> Result<CheckResult, Box<dyn Error>> {
38        let mut result = CheckResult::new();
39
40        for diff in content.diffs() {
41            match diff.status {
42                StatusChange::Added | StatusChange::Modified(_) => (),
43                _ => continue,
44            }
45
46            let diff_attr = ctx.check_attr("diff", diff.name.as_path())?;
47            if let AttributeState::Unset = diff_attr {
48                // Binary files should not be handled here.
49                continue;
50            }
51
52            let patch = match content.path_diff(&diff.name) {
53                Ok(s) => s,
54                Err(err) => {
55                    result.add_alert(
56                        format!(
57                            "{}failed to get the diff for file `{}`: {}.",
58                            commit_prefix(content),
59                            diff.name,
60                            err,
61                        ),
62                        true,
63                    );
64                    continue;
65                },
66            };
67
68            for line in patch.lines().filter(|line| line.starts_with('+')) {
69                if line.contains(REPLACEMENT_CHARACTER) {
70                    // Escape instances of backticks and backslashes.
71                    let safe_line = line[1..].replace('\\', "\\\\").replace('`', "\\`");
72                    result.add_error(format!(
73                        "{}invalid utf-8 sequence added in `{}`: `{}`.",
74                        commit_prefix_str(content, "not allowed;"),
75                        diff.name,
76                        safe_line,
77                    ));
78                }
79            }
80        }
81
82        Ok(result)
83    }
84}
85
86#[cfg(feature = "config")]
87pub(crate) mod config {
88    use git_checks_config::{register_checks, CommitCheckConfig, IntoCheck, TopicCheckConfig};
89    use serde::Deserialize;
90    #[cfg(test)]
91    use serde_json::json;
92
93    use crate::InvalidUtf8;
94
95    /// Configuration for the `InvalidUtf8` check.
96    ///
97    /// No configuration available.
98    ///
99    /// This check is registered as a commit check with the name `"invalid_utf8"` and a topic check
100    /// with the name `"invalid_utf8/topic"`.
101    #[derive(Deserialize, Debug)]
102    pub struct InvalidUtf8Config {}
103
104    impl IntoCheck for InvalidUtf8Config {
105        type Check = InvalidUtf8;
106
107        fn into_check(self) -> Self::Check {
108            Default::default()
109        }
110    }
111
112    register_checks! {
113        InvalidUtf8Config {
114            "invalid_utf8" => CommitCheckConfig,
115            "invalid_utf8/topic" => TopicCheckConfig,
116        },
117    }
118
119    #[test]
120    fn test_invalid_utf8_config_empty() {
121        let json = json!({});
122        let check: InvalidUtf8Config = serde_json::from_value(json).unwrap();
123
124        let _ = check.into_check();
125    }
126}
127
128#[cfg(test)]
129mod tests {
130    use git_checks_core::{Check, TopicCheck};
131
132    use crate::test::*;
133    use crate::InvalidUtf8;
134
135    const BAD_TOPIC: &str = "cf16b71a21023320ffab7b3f7673dc62f33e5022";
136    const DELETE_TOPIC: &str = "0f5b216e14e1af3e8e8b5bbb8116c3a89a35f73f";
137    const FIX_TOPIC: &str = "e8763477e9ebef4a61d130724cee9e29b13f857e";
138    const BINARY_COMMIT: &str = "2b10dde72c3d11e8d3ce14417cefab810f545eeb";
139
140    #[test]
141    fn test_invalid_utf8_builder_default() {
142        assert!(InvalidUtf8::builder().build().is_ok());
143    }
144
145    #[test]
146    fn test_invalid_utf8_name_commit() {
147        let check = InvalidUtf8::default();
148        assert_eq!(Check::name(&check), "invalid-utf8");
149    }
150
151    #[test]
152    fn test_invalid_utf8_name_topic() {
153        let check = InvalidUtf8::default();
154        assert_eq!(TopicCheck::name(&check), "invalid-utf8");
155    }
156
157    #[test]
158    fn test_invalid_utf8() {
159        let check = InvalidUtf8::default();
160        let result = run_check("test_invalid_utf8", BAD_TOPIC, check);
161        test_result_errors(result, &[
162            "commit cf16b71a21023320ffab7b3f7673dc62f33e5022 not allowed; invalid utf-8 sequence \
163             added in `invalid-utf8`: `This file contains an invalid utf-8 sequence: \u{fffd}`.",
164        ]);
165    }
166
167    #[test]
168    fn test_invalid_utf8_topic() {
169        let check = InvalidUtf8::default();
170        let result = run_topic_check("test_invalid_utf8_topic", BAD_TOPIC, check);
171        test_result_errors(result, &[
172            "invalid utf-8 sequence added in `invalid-utf8`: `This file contains an invalid utf-8 \
173             sequence: \u{fffd}`.",
174        ]);
175    }
176
177    #[test]
178    fn test_invalid_utf8_delete_file() {
179        let check = InvalidUtf8::default();
180        let conf = make_check_conf(&check);
181
182        let result = test_check_base(
183            "test_invalid_utf8_delete_file",
184            DELETE_TOPIC,
185            BAD_TOPIC,
186            &conf,
187        );
188        test_result_ok(result);
189    }
190
191    #[test]
192    fn test_invalid_utf8_delete_file_topic() {
193        let check = InvalidUtf8::default();
194        let result = run_topic_check("test_invalid_utf8_delete_file_topic", DELETE_TOPIC, check);
195        test_result_ok(result);
196    }
197
198    #[test]
199    fn test_invalid_utf8_topic_fixed() {
200        let check = InvalidUtf8::default();
201        run_topic_check_ok("test_invalid_utf8_topic_fixed", FIX_TOPIC, check);
202    }
203
204    #[test]
205    fn test_invalid_utf8_ignore_by_binary() {
206        let check = InvalidUtf8::default();
207        run_check_ok("test_invalid_utf8_ignore_by_binary", BINARY_COMMIT, check);
208    }
209
210    #[test]
211    fn test_invalid_utf8_ignore_by_binary_topic() {
212        let check = InvalidUtf8::default();
213        run_topic_check_ok(
214            "test_invalid_utf8_ignore_by_binary_topic",
215            BINARY_COMMIT,
216            check,
217        );
218    }
219}