Skip to main content

zerv/utils/
sanitize.rs

1#[derive(Debug, Clone, PartialEq)]
2pub enum SanitizeTarget {
3    /// Clean string for version identifiers (alphanumeric + separator)
4    Str,
5    /// Extract digits as unsigned integer string
6    UInt,
7}
8
9#[derive(Debug, Clone)]
10pub struct Sanitizer {
11    /// What type of output to produce
12    pub target: SanitizeTarget,
13    /// Replace non-alphanumeric characters with this separator, or None to keep unchanged (Str target only)
14    pub separator: Option<String>,
15    /// Convert to lowercase (String target only)
16    pub lowercase: bool,
17    /// Keep leading zeros in numeric segments
18    pub keep_zeros: bool,
19    /// Maximum length (truncate if longer)
20    pub max_length: Option<usize>,
21}
22
23impl Sanitizer {
24    /// Apply sanitization to input string
25    pub fn sanitize(&self, input: &str) -> String {
26        match self.target {
27            SanitizeTarget::Str => self.sanitize_to_string(input),
28            SanitizeTarget::UInt => self.sanitize_to_integer(input),
29        }
30    }
31
32    /// PEP440 local string sanitization: lowercase, dots, no leading zeros
33    pub fn pep440_local_str() -> Self {
34        Self {
35            target: SanitizeTarget::Str,
36            separator: Some(".".to_string()),
37            lowercase: true,
38            keep_zeros: false,
39            max_length: None,
40        }
41    }
42
43    /// SemVer string sanitization: preserve case, dots
44    pub fn semver_str() -> Self {
45        Self {
46            target: SanitizeTarget::Str,
47            separator: Some(".".to_string()),
48            lowercase: false,
49            keep_zeros: false,
50            max_length: None,
51        }
52    }
53
54    /// Extract unsigned integer from string
55    pub fn uint() -> Self {
56        Self {
57            target: SanitizeTarget::UInt,
58            separator: None,
59            lowercase: false,
60            keep_zeros: false,
61            max_length: None,
62        }
63    }
64
65    /// Custom string sanitizer
66    pub fn str(
67        separator: Option<&str>,
68        lowercase: bool,
69        keep_zeros: bool,
70        max_length: Option<usize>,
71    ) -> Self {
72        Self {
73            target: SanitizeTarget::Str,
74            separator: separator.map(|s| s.to_string()),
75            lowercase,
76            keep_zeros,
77            max_length,
78        }
79    }
80
81    /// Key sanitizer - for sanitizing keys
82    pub fn key() -> Self {
83        Self {
84            target: SanitizeTarget::Str,
85            separator: Some(".".to_string()),
86            lowercase: true,
87            keep_zeros: false,
88            max_length: None,
89        }
90    }
91
92    /// Sanitize to clean string
93    fn sanitize_to_string(&self, input: &str) -> String {
94        let mut result = input.to_string();
95
96        if self.lowercase {
97            result = result.to_lowercase();
98        }
99
100        result = self.replace_non_alphanumeric(&result);
101
102        if !self.keep_zeros {
103            result = self.remove_leading_zeros(&result);
104        }
105
106        if let Some(max_len) = self.max_length {
107            result.truncate(max_len);
108        }
109
110        if let Some(sep) = &self.separator {
111            result = result
112                .trim_start_matches(sep)
113                .trim_end_matches(sep)
114                .to_string();
115        }
116
117        result
118    }
119
120    /// Extract unsigned integer from string
121    fn sanitize_to_integer(&self, input: &str) -> String {
122        let trimmed = input.trim();
123
124        // Only accept strings that are purely digits
125        if trimmed.chars().all(|c| c.is_ascii_digit()) && !trimmed.is_empty() {
126            if self.keep_zeros {
127                trimmed.to_string()
128            } else {
129                let without_leading_zeros = trimmed.trim_start_matches('0');
130                if without_leading_zeros.is_empty() {
131                    "0".to_string()
132                } else {
133                    without_leading_zeros.to_string()
134                }
135            }
136        } else {
137            "".to_string()
138        }
139    }
140
141    /// Replace non-alphanumeric characters with separator or keep unchanged
142    fn replace_non_alphanumeric(&self, input: &str) -> String {
143        let Some(sep) = &self.separator else {
144            return input.to_string();
145        };
146
147        let mut result = String::new();
148        let mut last_was_sep = false;
149
150        for ch in input.chars() {
151            if ch.is_alphanumeric() {
152                result.push(ch);
153                last_was_sep = false;
154            } else if !last_was_sep {
155                result.push_str(sep);
156                last_was_sep = true;
157            }
158        }
159
160        result.trim_end_matches(sep).to_string()
161    }
162
163    /// Remove leading zeros from numeric segments
164    fn remove_leading_zeros(&self, input: &str) -> String {
165        let Some(sep) = &self.separator else {
166            return self.remove_leading_zeros_from_segment(input);
167        };
168
169        if input.is_empty() {
170            return input.to_string();
171        }
172
173        input
174            .split(sep)
175            .map(|segment| self.remove_leading_zeros_from_segment(segment))
176            .collect::<Vec<_>>()
177            .join(sep)
178    }
179
180    fn remove_leading_zeros_from_segment(&self, segment: &str) -> String {
181        if !segment.is_empty() && segment.chars().all(|c| c.is_ascii_digit()) {
182            let trimmed = segment.trim_start_matches('0');
183            if trimmed.is_empty() {
184                "0".to_string()
185            } else {
186                trimmed.to_string()
187            }
188        } else {
189            segment.to_string()
190        }
191    }
192}
193
194#[cfg(test)]
195mod tests {
196    use super::*;
197
198    fn semver() -> Sanitizer {
199        Sanitizer::semver_str()
200    }
201    fn pep440() -> Sanitizer {
202        Sanitizer::pep440_local_str()
203    }
204    fn uint() -> Sanitizer {
205        Sanitizer::uint()
206    }
207    fn key() -> Sanitizer {
208        Sanitizer::key()
209    }
210
211    #[test]
212    fn test_semver_str_sanitization() {
213        let s = semver();
214        assert_eq!(s.sanitize("feature/test-branch"), "feature.test.branch");
215        assert_eq!(s.sanitize("Build-ID-0051"), "Build.ID.51");
216        assert_eq!(s.sanitize("test@#$%branch"), "test.branch");
217        assert_eq!(s.sanitize("Feature/API-v2"), "Feature.API.v2");
218        assert_eq!(s.sanitize("build-id-0051"), "build.id.51");
219        assert_eq!(s.sanitize("123"), "123");
220        assert_eq!(s.sanitize("000045445"), "45445");
221    }
222
223    #[test]
224    fn test_pep440_local_str_sanitization() {
225        let s = pep440();
226        assert_eq!(s.sanitize("Feature/API-v2"), "feature.api.v2");
227        assert_eq!(s.sanitize("Build-ID-0051"), "build.id.51");
228        assert_eq!(s.sanitize("TEST_BRANCH"), "test.branch");
229        assert_eq!(s.sanitize("000045445"), "45445");
230        assert_eq!(s.sanitize("123"), "123");
231        assert_eq!(s.sanitize("0"), "0");
232        assert_eq!(s.sanitize("999999"), "999999");
233        assert_eq!(s.sanitize("  42  "), "42");
234        assert_eq!(s.sanitize("abc123"), "abc123");
235        assert_eq!(s.sanitize("123abc"), "123abc");
236        assert_eq!(s.sanitize("v1.2.3"), "v1.2.3");
237    }
238
239    #[test]
240    fn test_uint_extraction() {
241        let s = uint();
242        assert_eq!(s.sanitize("123"), "123");
243        assert_eq!(s.sanitize("0051"), "51");
244        assert_eq!(s.sanitize("0000"), "0");
245        assert_eq!(s.sanitize("00123"), "123");
246        assert_eq!(s.sanitize("abc123def456"), "");
247        assert_eq!(s.sanitize("no-digits"), "");
248        assert_eq!(s.sanitize("abc"), "");
249        assert_eq!(s.sanitize(""), "");
250        assert_eq!(s.sanitize("-123"), "");
251    }
252
253    #[test]
254    fn test_custom_config() {
255        let sanitizer = Sanitizer::str(Some("_"), true, true, Some(10));
256
257        assert_eq!(sanitizer.sanitize("Feature/Test-0051"), "feature_te");
258        assert_eq!(sanitizer.sanitize("Build-ID-0051"), "build_id_0");
259    }
260
261    #[test]
262    fn test_leading_zeros() {
263        let sanitizer_remove = Sanitizer::str(Some("."), false, false, None);
264        let sanitizer_keep = Sanitizer::str(Some("."), false, true, None);
265
266        assert_eq!(sanitizer_remove.sanitize("test-0051"), "test.51");
267        assert_eq!(sanitizer_keep.sanitize("test-0051"), "test.0051");
268        assert_eq!(sanitizer_remove.sanitize("test-0000"), "test.0");
269    }
270
271    #[test]
272    fn test_max_length() {
273        let sanitizer = Sanitizer::str(Some("."), false, false, Some(10));
274
275        assert_eq!(sanitizer.sanitize("very-long-branch-name"), "very.long");
276    }
277
278    #[test]
279    fn test_edge_cases() {
280        let s = semver();
281        assert_eq!(s.sanitize(""), "");
282        assert_eq!(s.sanitize("123"), "123");
283        assert_eq!(s.sanitize("@#$%"), "");
284        assert_eq!(s.sanitize("a@#$%b"), "a.b");
285    }
286
287    #[test]
288    fn test_no_separator() {
289        let s = Sanitizer::str(None, false, false, None);
290        assert_eq!(s.sanitize("feature/test-branch"), "feature/test-branch");
291        assert_eq!(s.sanitize("Build-ID-0051"), "Build-ID-0051");
292    }
293
294    #[test]
295    fn test_key_sanitizer() {
296        let s = key();
297        assert_eq!(s.sanitize("custom_field"), "custom.field");
298        assert_eq!(s.sanitize("feature/API-v2"), "feature.api.v2");
299        assert_eq!(s.sanitize("Build-ID-0051"), "build.id.51");
300        assert_eq!(s.sanitize("test@#$%branch"), "test.branch");
301        assert_eq!(s.sanitize(""), "");
302    }
303
304    use rstest::rstest;
305
306    #[rstest]
307    #[case(false)]
308    #[case(true)]
309    fn test_separator_trimming(#[case] keep_zeros: bool) {
310        let s = Sanitizer::str(Some("."), false, keep_zeros, None);
311        assert_eq!(s.sanitize("abc-test-branch-def"), "abc.test.branch.def");
312        assert_eq!(s.sanitize("---test---"), "test");
313        assert_eq!(s.sanitize("@#$test@#$"), "test");
314
315        let s_short = Sanitizer::str(Some("."), false, keep_zeros, Some(10));
316        assert_eq!(s_short.sanitize("very-long-branch"), "very.long");
317    }
318}