Skip to main content

cloudiful_redactor/session/
restore.rs

1use std::collections::{BTreeSet, HashMap};
2
3use anyhow::{Result, anyhow};
4
5use crate::types::{RedactionSession, RestoreResult};
6
7pub fn restore_text_with_session(input: &str, session: &RedactionSession) -> RestoreResult {
8    let known_tokens = session
9        .entries
10        .iter()
11        .map(|entry| entry.token.clone())
12        .collect::<BTreeSet<_>>();
13    let token_map = session
14        .entries
15        .iter()
16        .map(|entry| (entry.token.as_str(), entry.original.as_str()))
17        .collect::<HashMap<_, _>>();
18
19    let mut restored_text = String::with_capacity(input.len());
20    let mut restored_count = 0;
21    let mut validation_errors = Vec::new();
22    let mut cursor = 0;
23
24    for token_range in token_like_ranges(input) {
25        restored_text.push_str(&input[cursor..token_range.start]);
26        let candidate = &input[token_range.clone()];
27        if !known_tokens.contains(candidate) {
28            validation_errors.push(format!("unknown or malformed token `{candidate}`"));
29            restored_text.push_str(candidate);
30        } else if let Some(original) = token_map.get(candidate) {
31            restored_text.push_str(original);
32            restored_count += 1;
33        } else {
34            restored_text.push_str(candidate);
35        }
36        cursor = token_range.end;
37    }
38    restored_text.push_str(&input[cursor..]);
39
40    let unresolved_tokens = token_like_ranges(&restored_text)
41        .into_iter()
42        .map(|range| restored_text[range].to_string())
43        .filter(|candidate| candidate.starts_with("__R_"))
44        .collect::<Vec<_>>();
45
46    if !unresolved_tokens.is_empty() {
47        validation_errors.extend(
48            unresolved_tokens
49                .iter()
50                .map(|candidate| format!("unresolved token remained after restore: `{candidate}`")),
51        );
52    }
53
54    RestoreResult {
55        restored_text,
56        restored_count,
57        unresolved_tokens,
58        validation_errors,
59    }
60}
61
62pub fn restore_patch_with_session(patch: &str, session: &RedactionSession) -> RestoreResult {
63    restore_text_with_session(patch, session)
64}
65
66pub fn ensure_restore_valid(result: &RestoreResult) -> Result<()> {
67    if result.is_valid() {
68        return Ok(());
69    }
70
71    let mut messages = Vec::new();
72    if !result.validation_errors.is_empty() {
73        messages.extend(result.validation_errors.clone());
74    }
75    if !result.unresolved_tokens.is_empty() {
76        messages.push(format!(
77            "unresolved tokens: {}",
78            result.unresolved_tokens.join(", ")
79        ));
80    }
81    Err(anyhow!(messages.join("; ")))
82}
83
84fn token_like_ranges(text: &str) -> Vec<std::ops::Range<usize>> {
85    let mut ranges = Vec::new();
86    let bytes = text.as_bytes();
87    let mut index = 0;
88
89    while index + 4 <= bytes.len() {
90        if &bytes[index..index + 4] != b"__R_" {
91            index += 1;
92            continue;
93        }
94
95        let mut end = index + 4;
96        while end < bytes.len() {
97            let byte = bytes[end];
98            if byte.is_ascii_uppercase() || byte.is_ascii_digit() || byte == b'_' {
99                end += 1;
100                continue;
101            }
102            break;
103        }
104
105        if end + 1 < bytes.len() && bytes[end] == b'_' && bytes[end + 1] == b'_' {
106            end += 2;
107        }
108
109        ranges.push(index..end);
110        index = end.max(index + 1);
111    }
112
113    ranges
114}
115
116#[cfg(test)]
117mod tests {
118    use super::restore_text_with_session;
119    use crate::{FindingKind, RedactionPolicy, Redactor, RedactorBuilder};
120
121    fn domain_redactor() -> Redactor {
122        RedactorBuilder::new()
123            .with_redaction_policy(
124                RedactionPolicy::default()
125                    .with_kind(FindingKind::Domain, true)
126                    .with_kind(FindingKind::Secret, true)
127                    .with_kind(FindingKind::Url, true),
128            )
129            .build()
130    }
131
132    #[test]
133    fn restore_streams_multiple_tokens_and_repetitions() {
134        let redactor = domain_redactor();
135        let text = "host=service.example.com alt=service.example.com";
136        let session = redactor.redact_with_session(text).expect("session");
137
138        let restored = restore_text_with_session(&session.redacted_text, &session);
139
140        assert!(restored.is_valid());
141        assert_eq!(restored.restored_text, text);
142        assert_eq!(restored.restored_count, 2);
143    }
144
145    #[test]
146    fn restore_preserves_unknown_token_validation() {
147        let redactor = domain_redactor();
148        let session = redactor
149            .redact_with_session("host=service.example.com")
150            .expect("session");
151        let restored = restore_text_with_session("__R_DOMAIN_001__ __R_DOMAIN_999__", &session);
152
153        assert!(
154            restored
155                .validation_errors
156                .iter()
157                .any(|message| message.contains("unknown or malformed token `__R_DOMAIN_999__`"))
158        );
159        assert_eq!(restored.unresolved_tokens, vec!["__R_DOMAIN_999__"]);
160    }
161}