1use crate::error::Result;
18
19#[derive(Debug, Clone)]
21pub struct ProjectionConfig {
22 pub strip_prefixes: Vec<String>,
24 pub strip_names: Vec<String>,
26 pub max_depth: usize,
29 pub strip_empty: bool,
31 pub dedup_timestamps: bool,
33}
34
35impl Default for ProjectionConfig {
36 fn default() -> Self {
37 Self {
38 strip_prefixes: vec![
39 "_".to_string(),
40 "debug".to_string(),
41 "internal".to_string(),
42 "trace".to_string(),
43 "x_".to_string(),
44 ],
45 strip_names: vec![
46 "__v".to_string(),
47 "__typename".to_string(),
48 "etag".to_string(),
49 "_links".to_string(),
50 "_embedded".to_string(),
51 "cursor".to_string(),
52 "request_id".to_string(),
53 "x_request_id".to_string(),
54 "correlation_id".to_string(),
55 ],
56 max_depth: 5,
57 strip_empty: true,
58 dedup_timestamps: true,
59 }
60 }
61}
62
63#[derive(Debug, Clone)]
65pub struct ProjectionResult {
66 pub data: String,
68 pub fields_removed: usize,
70 pub tokens_saved: u32,
72}
73
74pub fn project_json(input: &str, config: &ProjectionConfig) -> Result<ProjectionResult> {
79 let trimmed = input.trim();
80 let mut value: serde_json::Value = match serde_json::from_str(trimmed) {
81 Ok(v) => v,
82 Err(_) => {
83 return Ok(ProjectionResult {
84 data: input.to_string(),
85 fields_removed: 0,
86 tokens_saved: 0,
87 });
88 }
89 };
90
91 let original_tokens = estimate_tokens(input);
92 let mut fields_removed = 0;
93
94 project_value(&mut value, config, 0, &mut fields_removed);
95
96 let projected = serde_json::to_string(&value)
97 .unwrap_or_else(|_| input.to_string());
98
99 let projected_tokens = estimate_tokens(&projected);
100 let tokens_saved = original_tokens.saturating_sub(projected_tokens);
101
102 if projected.len() < input.len() || fields_removed > 0 {
104 Ok(ProjectionResult {
105 data: projected,
106 fields_removed,
107 tokens_saved,
108 })
109 } else {
110 Ok(ProjectionResult {
111 data: input.to_string(),
112 fields_removed: 0,
113 tokens_saved: 0,
114 })
115 }
116}
117
118fn project_value(
120 value: &mut serde_json::Value,
121 config: &ProjectionConfig,
122 depth: usize,
123 removed: &mut usize,
124) {
125 match value {
126 serde_json::Value::Object(map) => {
127 if depth >= config.max_depth {
129 let key_count = map.len();
130 if key_count > 0 {
131 map.clear();
132 map.insert(
133 "_sqz_summary".to_string(),
134 serde_json::Value::String(format!("{{...{key_count} keys}}")),
135 );
136 *removed += key_count;
137 }
138 return;
139 }
140
141 let keys_to_remove: Vec<String> = map
142 .keys()
143 .filter(|k| should_strip_field(k, config))
144 .cloned()
145 .collect();
146
147 for key in &keys_to_remove {
148 map.remove(key);
149 *removed += 1;
150 }
151
152 if config.strip_empty {
154 let empty_keys: Vec<String> = map
155 .iter()
156 .filter(|(_, v)| is_empty_collection(v))
157 .map(|(k, _)| k.clone())
158 .collect();
159 for key in &empty_keys {
160 map.remove(key);
161 *removed += 1;
162 }
163 }
164
165 if config.dedup_timestamps {
168 dedup_timestamps(map, removed);
169 }
170
171 for v in map.values_mut() {
173 project_value(v, config, depth + 1, removed);
174 }
175 }
176 serde_json::Value::Array(arr) => {
177 for item in arr.iter_mut() {
178 project_value(item, config, depth + 1, removed);
179 }
180 }
181 _ => {}
182 }
183}
184
185fn should_strip_field(name: &str, config: &ProjectionConfig) -> bool {
187 let lower = name.to_lowercase();
188
189 for strip_name in &config.strip_names {
191 if lower == strip_name.to_lowercase() {
192 return true;
193 }
194 }
195
196 for prefix in &config.strip_prefixes {
198 let prefix_lower = prefix.to_lowercase();
199 if lower.starts_with(&prefix_lower) && lower != prefix_lower {
200 return true;
203 }
204 }
205
206 false
207}
208
209fn is_empty_collection(value: &serde_json::Value) -> bool {
211 match value {
212 serde_json::Value::Array(arr) => arr.is_empty(),
213 serde_json::Value::Object(map) => map.is_empty(),
214 serde_json::Value::String(s) => s.is_empty(),
215 _ => false,
216 }
217}
218
219fn dedup_timestamps(
222 map: &mut serde_json::Map<String, serde_json::Value>,
223 removed: &mut usize,
224) {
225 let timestamp_fields: Vec<(String, String)> = map
226 .iter()
227 .filter_map(|(k, v)| {
228 if (k.ends_with("_at") || k.ends_with("_date") || k.ends_with("_time"))
229 && v.is_string()
230 {
231 let date_prefix = v.as_str().unwrap_or("").chars().take(10).collect::<String>();
232 if date_prefix.len() == 10 && date_prefix.contains('-') {
233 return Some((k.clone(), date_prefix));
234 }
235 }
236 None
237 })
238 .collect();
239
240 if timestamp_fields.len() <= 1 {
241 return;
242 }
243
244 let mut seen_dates: std::collections::HashSet<String> = std::collections::HashSet::new();
246 let mut first_field_per_date: std::collections::HashMap<String, String> = std::collections::HashMap::new();
247 let mut to_remove = Vec::new();
248
249 for (field, date) in ×tamp_fields {
250 if seen_dates.contains(date) {
251 let primary = first_field_per_date.get(date).unwrap();
253 let dominated = if field.contains("created") {
255 to_remove.push(primary.clone());
257 false
258 } else {
259 true
260 };
261 if dominated {
262 to_remove.push(field.clone());
263 }
264 } else {
265 seen_dates.insert(date.clone());
266 first_field_per_date.insert(date.clone(), field.clone());
267 }
268 }
269
270 for field in &to_remove {
271 map.remove(field);
272 *removed += 1;
273 }
274}
275
276fn estimate_tokens(text: &str) -> u32 {
277 ((text.len() as f64) / 4.0).ceil() as u32
278}
279
280#[cfg(test)]
283mod tests {
284 use super::*;
285 use serde_json::json;
286
287 #[test]
288 fn test_strips_internal_fields() {
289 let input = json!({
290 "id": 1,
291 "name": "Alice",
292 "_id": "abc123",
293 "__v": 3,
294 "debug_info": "verbose stuff",
295 "internal_state": "hidden"
296 });
297 let config = ProjectionConfig::default();
298 let result = project_json(&serde_json::to_string(&input).unwrap(), &config).unwrap();
299 let parsed: serde_json::Value = serde_json::from_str(&result.data).unwrap();
300 assert!(parsed.get("id").is_some(), "id should be kept");
301 assert!(parsed.get("name").is_some(), "name should be kept");
302 assert!(parsed.get("_id").is_none(), "_id should be stripped");
303 assert!(parsed.get("__v").is_none(), "__v should be stripped");
304 assert!(parsed.get("debug_info").is_none(), "debug_info should be stripped");
305 assert!(result.fields_removed > 0);
306 }
307
308 #[test]
309 fn test_strips_empty_collections() {
310 let input = json!({
311 "name": "Bob",
312 "tags": [],
313 "metadata": {},
314 "bio": ""
315 });
316 let config = ProjectionConfig::default();
317 let result = project_json(&serde_json::to_string(&input).unwrap(), &config).unwrap();
318 let parsed: serde_json::Value = serde_json::from_str(&result.data).unwrap();
319 assert!(parsed.get("name").is_some());
320 assert!(parsed.get("tags").is_none(), "empty array should be stripped");
321 assert!(parsed.get("metadata").is_none(), "empty object should be stripped");
322 assert!(parsed.get("bio").is_none(), "empty string should be stripped");
323 }
324
325 #[test]
326 fn test_max_depth_truncation() {
327 let input = json!({
328 "a": {"b": {"c": {"d": {"e": {"f": "deep"}}}}}
329 });
330 let config = ProjectionConfig {
331 max_depth: 3,
332 ..Default::default()
333 };
334 let result = project_json(&serde_json::to_string(&input).unwrap(), &config).unwrap();
335 let parsed: serde_json::Value = serde_json::from_str(&result.data).unwrap();
337 let at_depth = &parsed["a"]["b"]["c"];
339 assert!(
340 at_depth.get("_sqz_summary").is_some() || result.fields_removed > 0,
341 "deep nesting should be truncated at max_depth: {:?}", parsed
342 );
343 }
344
345 #[test]
346 fn test_dedup_timestamps() {
347 let input = json!({
348 "name": "Alice",
349 "created_at": "2024-01-15T10:00:00Z",
350 "updated_at": "2024-01-15T14:30:00Z",
351 "modified_at": "2024-01-15T14:30:00Z"
352 });
353 let config = ProjectionConfig::default();
354 let result = project_json(&serde_json::to_string(&input).unwrap(), &config).unwrap();
355 let parsed: serde_json::Value = serde_json::from_str(&result.data).unwrap();
356 assert!(parsed.get("created_at").is_some(), "created_at should be kept");
358 assert!(result.fields_removed > 0, "redundant timestamps should be removed");
359 }
360
361 #[test]
362 fn test_non_json_passthrough() {
363 let input = "not json at all";
364 let config = ProjectionConfig::default();
365 let result = project_json(input, &config).unwrap();
366 assert_eq!(result.data, input);
367 assert_eq!(result.fields_removed, 0);
368 }
369
370 #[test]
371 fn test_preserves_important_fields() {
372 let input = json!({
373 "id": 42,
374 "name": "Alice",
375 "email": "alice@example.com",
376 "role": "admin",
377 "status": "active"
378 });
379 let config = ProjectionConfig::default();
380 let result = project_json(&serde_json::to_string(&input).unwrap(), &config).unwrap();
381 let parsed: serde_json::Value = serde_json::from_str(&result.data).unwrap();
382 assert_eq!(parsed["id"], 42);
383 assert_eq!(parsed["name"], "Alice");
384 assert_eq!(parsed["role"], "admin");
385 }
386
387 #[test]
388 fn test_custom_strip_prefixes() {
389 let input = json!({
390 "name": "Alice",
391 "tmp_cache": "data",
392 "tmp_buffer": "more data"
393 });
394 let config = ProjectionConfig {
395 strip_prefixes: vec!["tmp_".to_string()],
396 ..Default::default()
397 };
398 let result = project_json(&serde_json::to_string(&input).unwrap(), &config).unwrap();
399 let parsed: serde_json::Value = serde_json::from_str(&result.data).unwrap();
400 assert!(parsed.get("name").is_some());
401 assert!(parsed.get("tmp_cache").is_none());
402 assert!(parsed.get("tmp_buffer").is_none());
403 }
404
405 #[test]
406 fn test_nested_projection() {
407 let input = json!({
408 "user": {
409 "id": 1,
410 "name": "Alice",
411 "_internal_id": "xyz",
412 "debug_flags": [1, 2, 3]
413 }
414 });
415 let config = ProjectionConfig::default();
416 let result = project_json(&serde_json::to_string(&input).unwrap(), &config).unwrap();
417 let parsed: serde_json::Value = serde_json::from_str(&result.data).unwrap();
418 assert!(parsed["user"].get("id").is_some());
419 assert!(parsed["user"].get("_internal_id").is_none());
420 assert!(parsed["user"].get("debug_flags").is_none());
421 }
422
423 use proptest::prelude::*;
424
425 proptest! {
426 #[test]
428 fn prop_projection_produces_valid_json(
429 key1 in "[a-z]{3,10}",
430 key2 in "[a-z]{3,10}",
431 val in "[a-z0-9 ]{1,50}",
432 ) {
433 let input = format!(r#"{{"{key1}":"{val}","{key2}":42}}"#);
434 let config = ProjectionConfig::default();
435 let result = project_json(&input, &config).unwrap();
436 let parsed: std::result::Result<serde_json::Value, _> = serde_json::from_str(&result.data);
437 prop_assert!(parsed.is_ok(), "projection output must be valid JSON");
438 }
439
440 #[test]
442 fn prop_fields_removed_non_negative(
443 val in "[a-z]{1,20}",
444 ) {
445 let input = format!(r#"{{"name":"{val}","_debug":"x","__v":1}}"#);
446 let config = ProjectionConfig::default();
447 let result = project_json(&input, &config).unwrap();
448 let _ = result.fields_removed;
450 }
451 }
452}