cloudiful_redactor/session/
restore.rs1use std::collections::{BTreeSet, HashMap};
2
3use anyhow::{Result, anyhow};
4
5use crate::types::{RedactionSession, RestoreResult};
6
7pub fn restore_text_with_session(input: &str, session: &RedactionSession) -> RestoreResult {
8 let known_tokens = session
9 .entries
10 .iter()
11 .map(|entry| entry.token.clone())
12 .collect::<BTreeSet<_>>();
13 let token_map = session
14 .entries
15 .iter()
16 .map(|entry| (entry.token.as_str(), entry.original.as_str()))
17 .collect::<HashMap<_, _>>();
18
19 let mut restored_text = String::with_capacity(input.len());
20 let mut restored_count = 0;
21 let mut validation_errors = Vec::new();
22 let mut cursor = 0;
23
24 for token_range in token_like_ranges(input) {
25 restored_text.push_str(&input[cursor..token_range.start]);
26 let candidate = &input[token_range.clone()];
27 if !known_tokens.contains(candidate) {
28 validation_errors.push(format!("unknown or malformed token `{candidate}`"));
29 restored_text.push_str(candidate);
30 } else if let Some(original) = token_map.get(candidate) {
31 restored_text.push_str(original);
32 restored_count += 1;
33 } else {
34 restored_text.push_str(candidate);
35 }
36 cursor = token_range.end;
37 }
38 restored_text.push_str(&input[cursor..]);
39
40 let unresolved_tokens = token_like_ranges(&restored_text)
41 .into_iter()
42 .map(|range| restored_text[range].to_string())
43 .filter(|candidate| candidate.starts_with("__R_"))
44 .collect::<Vec<_>>();
45
46 if !unresolved_tokens.is_empty() {
47 validation_errors.extend(
48 unresolved_tokens
49 .iter()
50 .map(|candidate| format!("unresolved token remained after restore: `{candidate}`")),
51 );
52 }
53
54 RestoreResult {
55 restored_text,
56 restored_count,
57 unresolved_tokens,
58 validation_errors,
59 }
60}
61
62pub fn restore_patch_with_session(patch: &str, session: &RedactionSession) -> RestoreResult {
63 restore_text_with_session(patch, session)
64}
65
66pub fn ensure_restore_valid(result: &RestoreResult) -> Result<()> {
67 if result.is_valid() {
68 return Ok(());
69 }
70
71 let mut messages = Vec::new();
72 if !result.validation_errors.is_empty() {
73 messages.extend(result.validation_errors.clone());
74 }
75 if !result.unresolved_tokens.is_empty() {
76 messages.push(format!(
77 "unresolved tokens: {}",
78 result.unresolved_tokens.join(", ")
79 ));
80 }
81 Err(anyhow!(messages.join("; ")))
82}
83
84fn token_like_ranges(text: &str) -> Vec<std::ops::Range<usize>> {
85 let mut ranges = Vec::new();
86 let bytes = text.as_bytes();
87 let mut index = 0;
88
89 while index + 4 <= bytes.len() {
90 if &bytes[index..index + 4] != b"__R_" {
91 index += 1;
92 continue;
93 }
94
95 let mut end = index + 4;
96 while end < bytes.len() {
97 let byte = bytes[end];
98 if byte.is_ascii_uppercase() || byte.is_ascii_digit() || byte == b'_' {
99 end += 1;
100 continue;
101 }
102 break;
103 }
104
105 if end + 1 < bytes.len() && bytes[end] == b'_' && bytes[end + 1] == b'_' {
106 end += 2;
107 }
108
109 ranges.push(index..end);
110 index = end.max(index + 1);
111 }
112
113 ranges
114}
115
116#[cfg(test)]
117mod tests {
118 use super::restore_text_with_session;
119 use crate::{FindingKind, RedactionPolicy, Redactor, RedactorBuilder};
120
121 fn domain_redactor() -> Redactor {
122 RedactorBuilder::new()
123 .with_redaction_policy(
124 RedactionPolicy::default()
125 .with_kind(FindingKind::Domain, true)
126 .with_kind(FindingKind::Secret, true)
127 .with_kind(FindingKind::Url, true),
128 )
129 .build()
130 }
131
132 #[test]
133 fn restore_streams_multiple_tokens_and_repetitions() {
134 let redactor = domain_redactor();
135 let text = "host=service.example.com alt=service.example.com";
136 let session = redactor.redact_with_session(text).expect("session");
137
138 let restored = restore_text_with_session(&session.redacted_text, &session);
139
140 assert!(restored.is_valid());
141 assert_eq!(restored.restored_text, text);
142 assert_eq!(restored.restored_count, 2);
143 }
144
145 #[test]
146 fn restore_preserves_unknown_token_validation() {
147 let redactor = domain_redactor();
148 let session = redactor
149 .redact_with_session("host=service.example.com")
150 .expect("session");
151 let restored = restore_text_with_session("__R_DOMAIN_001__ __R_DOMAIN_999__", &session);
152
153 assert!(
154 restored
155 .validation_errors
156 .iter()
157 .any(|message| message.contains("unknown or malformed token `__R_DOMAIN_999__`"))
158 );
159 assert_eq!(restored.unresolved_tokens, vec!["__R_DOMAIN_999__"]);
160 }
161}