Skip to main content

cloudiful_redactor/
input.rs

1use serde::{Deserialize, Serialize};
2use std::ops::Range;
3
4#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
5#[serde(rename_all = "snake_case")]
6pub enum InputKind {
7    #[default]
8    Text,
9    GitDiff,
10}
11
12#[derive(Debug, Clone)]
13pub(crate) struct RedactableRange {
14    pub range: Range<usize>,
15    pub file_path: Option<String>,
16}
17
18pub(crate) fn redactable_ranges(
19    text: &str,
20    input_kind: InputKind,
21    source_path: Option<&str>,
22) -> Vec<RedactableRange> {
23    match input_kind {
24        InputKind::Text => {
25            let file_path = source_path.map(|s| s.to_string());
26            vec![RedactableRange {
27                range: 0..text.len(),
28                file_path,
29            }]
30        }
31        InputKind::GitDiff => git_diff_redactable_ranges(text),
32    }
33}
34
35fn git_diff_redactable_ranges(text: &str) -> Vec<RedactableRange> {
36    let mut ranges = Vec::new();
37    let mut offset = 0;
38    let mut in_hunk = false;
39    let mut in_binary_patch = false;
40    let mut current_file_path: Option<String> = None;
41
42    for line in text.split_inclusive('\n') {
43        if line.starts_with("diff --git ") {
44            in_hunk = false;
45            in_binary_patch = false;
46            current_file_path = parse_diff_git_path(line);
47            offset += line.len();
48            continue;
49        }
50
51        if line.starts_with("+++ ") || line.starts_with("--- ") {
52            current_file_path = parse_diff_path_line(line)
53                .or(current_file_path.take())
54                .or(current_file_path.clone());
55            if line.starts_with("+++ ") {
56                current_file_path = parse_diff_path_line(line).or(current_file_path);
57            }
58            offset += line.len();
59            continue;
60        }
61
62        if line.starts_with("GIT binary patch") || line.starts_with("Binary files ") {
63            in_hunk = false;
64            in_binary_patch = true;
65            offset += line.len();
66            continue;
67        }
68
69        if in_binary_patch {
70            offset += line.len();
71            continue;
72        }
73
74        if line.starts_with("@@") {
75            in_hunk = true;
76            offset += line.len();
77            continue;
78        }
79
80        if !in_hunk {
81            offset += line.len();
82            continue;
83        }
84
85        if line.starts_with("\\ No newline at end of file") {
86            offset += line.len();
87            continue;
88        }
89
90        if matches!(
91            line.as_bytes().first(),
92            Some(b'+') | Some(b'-') | Some(b' ')
93        ) && line.len() > 1
94        {
95            ranges.push(RedactableRange {
96                range: (offset + 1)..(offset + line.len()),
97                file_path: current_file_path.clone(),
98            });
99        }
100
101        offset += line.len();
102    }
103
104    ranges
105}
106
107fn parse_diff_git_path(line: &str) -> Option<String> {
108    let rest = line.strip_prefix("diff --git ")?;
109    let path_part = rest.split_whitespace().next()?;
110    let path = path_part.strip_prefix("a/").unwrap_or(path_part).to_string();
111    Some(path)
112}
113
114fn parse_diff_path_line(line: &str) -> Option<String> {
115    let rest = if line.starts_with("+++ ") {
116        line.strip_prefix("+++ ")?
117    } else if line.starts_with("--- ") {
118        line.strip_prefix("--- ")?
119    } else {
120        return None;
121    };
122    let path = rest.strip_prefix("b/").or_else(|| rest.strip_prefix("a/")).unwrap_or(rest);
123    let path = path.trim();
124    if path.is_empty() || path == "/dev/null" {
125        return None;
126    }
127    Some(path.to_string())
128}
129
130#[cfg(test)]
131mod tests {
132    use super::{InputKind, redactable_ranges};
133
134    #[test]
135    fn git_diff_ranges_only_cover_hunk_lines() {
136        let diff = concat!(
137            "diff --git a/config.yml b/config.yml\n",
138            "index 1111111..2222222 100644\n",
139            "--- a/config.yml\n",
140            "+++ b/config.yml\n",
141            "@@ -1,2 +1,3 @@\n",
142            "-old_secret=abc123\n",
143            "+new_secret=def456\n",
144            " unchanged=value\n",
145        );
146
147        let ranges = redactable_ranges(diff, InputKind::GitDiff, None);
148        let covered = ranges
149            .into_iter()
150            .map(|r| &diff[r.range])
151            .collect::<Vec<_>>();
152
153        assert_eq!(
154            covered,
155            vec![
156                "old_secret=abc123\n",
157                "new_secret=def456\n",
158                "unchanged=value\n"
159            ]
160        );
161    }
162
163    #[test]
164    fn text_mode_covers_entire_input() {
165        let text = "plain text";
166        let ranges = redactable_ranges(text, InputKind::Text, None);
167        assert_eq!(ranges.len(), 1);
168        assert_eq!(&text[ranges[0].range.clone()], text);
169    }
170
171    #[test]
172    fn text_mode_with_source_path() {
173        let text = "content";
174        let ranges = redactable_ranges(text, InputKind::Text, Some("secrets/env"));
175        assert_eq!(ranges.len(), 1);
176        assert_eq!(ranges[0].file_path.as_deref(), Some("secrets/env"));
177    }
178
179    #[test]
180    fn git_diff_extracts_file_path_from_headers() {
181        let diff = concat!(
182            "diff --git a/app/config.yml b/app/config.yml\n",
183            "index 1111111..2222222 100644\n",
184            "--- a/app/config.yml\n",
185            "+++ b/app/config.yml\n",
186            "@@ -1,1 +1,1 @@\n",
187            "-old_secret=abc123\n",
188        );
189
190        let ranges = redactable_ranges(diff, InputKind::GitDiff, None);
191        assert_eq!(ranges.len(), 1);
192        assert_eq!(
193            ranges[0].file_path.as_deref(),
194            Some("app/config.yml")
195        );
196    }
197}