Skip to main content

sqz_engine/
stages.rs

1use crate::error::{Result, SqzError};
2use crate::toon::ToonEncoder;
3use crate::types::{Content, ContentType, StageConfig};
4
5/// A single compression stage in the pipeline.
6///
7/// Each stage transforms `Content` in place according to its `StageConfig`.
8/// Stages must check `config.enabled` and return early (no-op) when disabled.
9pub trait CompressionStage: Send + Sync {
10    fn name(&self) -> &str;
11    fn priority(&self) -> u32;
12    fn process(&self, content: &mut Content, config: &StageConfig) -> Result<()>;
13}
14
15// ---------------------------------------------------------------------------
16// Helper: parse raw as JSON, apply a transform, serialize back
17// ---------------------------------------------------------------------------
18
19fn with_json<F>(content: &mut Content, f: F) -> Result<()>
20where
21    F: FnOnce(&mut serde_json::Value) -> Result<()>,
22{
23    if !ToonEncoder::is_json(&content.raw) {
24        return Ok(());
25    }
26    let mut value: serde_json::Value = serde_json::from_str(&content.raw)?;
27    f(&mut value)?;
28    content.raw = serde_json::to_string(&value)?;
29    Ok(())
30}
31
32// ---------------------------------------------------------------------------
33// Stage 1: keep_fields
34// ---------------------------------------------------------------------------
35
36/// For JSON content, keep only the specified top-level fields; drop all others.
37/// Config options: `fields` — array of field name strings.
38/// Non-JSON content passes through unchanged.
39pub struct KeepFieldsStage;
40
41impl CompressionStage for KeepFieldsStage {
42    fn name(&self) -> &str {
43        "keep_fields"
44    }
45
46    fn priority(&self) -> u32 {
47        10
48    }
49
50    fn process(&self, content: &mut Content, config: &StageConfig) -> Result<()> {
51        if !config.enabled {
52            return Ok(());
53        }
54        let fields: Vec<String> = match config.options.get("fields") {
55            Some(v) => serde_json::from_value(v.clone())
56                .map_err(|e| SqzError::Other(format!("keep_fields: invalid fields option: {e}")))?,
57            None => return Ok(()),
58        };
59        if fields.is_empty() {
60            return Ok(());
61        }
62        with_json(content, |value| {
63            if let serde_json::Value::Object(map) = value {
64                map.retain(|k, _| fields.contains(k));
65            }
66            Ok(())
67        })
68    }
69}
70
71// ---------------------------------------------------------------------------
72// Stage 2: strip_fields
73// ---------------------------------------------------------------------------
74
75/// For JSON content, remove specified fields by key name.
76/// Supports dot-notation for nested fields (e.g. "metadata.internal_id").
77/// Config options: `fields` — array of field path strings.
78/// Non-JSON content passes through unchanged.
79pub struct StripFieldsStage;
80
81fn strip_field_path(value: &mut serde_json::Value, path: &[&str]) {
82    if path.is_empty() {
83        return;
84    }
85    if let serde_json::Value::Object(map) = value {
86        if path.len() == 1 {
87            map.remove(path[0]);
88        } else {
89            if let Some(child) = map.get_mut(path[0]) {
90                strip_field_path(child, &path[1..]);
91            }
92        }
93    }
94}
95
96impl CompressionStage for StripFieldsStage {
97    fn name(&self) -> &str {
98        "strip_fields"
99    }
100
101    fn priority(&self) -> u32 {
102        20
103    }
104
105    fn process(&self, content: &mut Content, config: &StageConfig) -> Result<()> {
106        if !config.enabled {
107            return Ok(());
108        }
109        let fields: Vec<String> = match config.options.get("fields") {
110            Some(v) => serde_json::from_value(v.clone())
111                .map_err(|e| SqzError::Other(format!("strip_fields: invalid fields option: {e}")))?,
112            None => return Ok(()),
113        };
114        if fields.is_empty() {
115            return Ok(());
116        }
117        with_json(content, |value| {
118            for field in &fields {
119                let parts: Vec<&str> = field.split('.').collect();
120                strip_field_path(value, &parts);
121            }
122            Ok(())
123        })
124    }
125}
126
127// ---------------------------------------------------------------------------
128// Stage 3: condense
129// ---------------------------------------------------------------------------
130
131/// For plain text / CLI output, collapse runs of repeated identical lines
132/// down to at most `max_repeated_lines`.
133/// Config options: `max_repeated_lines` (u32, default 3).
134/// Non-plain-text content passes through unchanged.
135pub struct CondenseStage;
136
137impl CompressionStage for CondenseStage {
138    fn name(&self) -> &str {
139        "condense"
140    }
141
142    fn priority(&self) -> u32 {
143        30
144    }
145
146    fn process(&self, content: &mut Content, config: &StageConfig) -> Result<()> {
147        if !config.enabled {
148            return Ok(());
149        }
150        // Only apply to plain text and CLI output
151        match &content.content_type {
152            ContentType::PlainText | ContentType::CliOutput { .. } => {}
153            _ => return Ok(()),
154        }
155
156        let max_repeated: u32 = config
157            .options
158            .get("max_repeated_lines")
159            .and_then(|v| v.as_u64())
160            .map(|v| v as u32)
161            .unwrap_or(3);
162
163        let mut result = Vec::new();
164        let mut current_line: Option<&str> = None;
165        let mut run_count: u32 = 0;
166
167        for line in content.raw.lines() {
168            match current_line {
169                Some(prev) if prev == line => {
170                    run_count += 1;
171                    if run_count <= max_repeated {
172                        result.push(line);
173                    }
174                }
175                _ => {
176                    current_line = Some(line);
177                    run_count = 1;
178                    result.push(line);
179                }
180            }
181        }
182
183        // Preserve trailing newline if original had one
184        let trailing_newline = content.raw.ends_with('\n');
185        content.raw = result.join("\n");
186        if trailing_newline {
187            content.raw.push('\n');
188        }
189        Ok(())
190    }
191}
192
193// ---------------------------------------------------------------------------
194// Stage 4: strip_nulls
195// ---------------------------------------------------------------------------
196
197/// For JSON content, recursively remove all null-valued fields from objects.
198/// Arrays keep their null elements.
199/// Config options: `enabled` (bool).
200pub struct StripNullsStage;
201
202fn strip_nulls_recursive(value: &mut serde_json::Value) {
203    match value {
204        serde_json::Value::Object(map) => {
205            map.retain(|_, v| !v.is_null());
206            for v in map.values_mut() {
207                strip_nulls_recursive(v);
208            }
209        }
210        serde_json::Value::Array(arr) => {
211            for item in arr.iter_mut() {
212                strip_nulls_recursive(item);
213            }
214        }
215        _ => {}
216    }
217}
218
219impl CompressionStage for StripNullsStage {
220    fn name(&self) -> &str {
221        "strip_nulls"
222    }
223
224    fn priority(&self) -> u32 {
225        40
226    }
227
228    fn process(&self, content: &mut Content, config: &StageConfig) -> Result<()> {
229        if !config.enabled {
230            return Ok(());
231        }
232        with_json(content, |value| {
233            strip_nulls_recursive(value);
234            Ok(())
235        })
236    }
237}
238
239// ---------------------------------------------------------------------------
240// Stage 5: flatten
241// ---------------------------------------------------------------------------
242
243/// For JSON content, flatten nested objects up to `max_depth` levels using
244/// dot-notation for flattened keys (e.g. `{"a":{"b":1}}` → `{"a.b":1}`).
245/// Config options: `max_depth` (u32, default 3).
246/// Non-JSON content passes through unchanged.
247pub struct FlattenStage;
248
249fn flatten_value(
250    value: &serde_json::Value,
251    prefix: &str,
252    depth: u32,
253    max_depth: u32,
254    out: &mut serde_json::Map<String, serde_json::Value>,
255) {
256    if let serde_json::Value::Object(map) = value {
257        if depth < max_depth {
258            for (k, v) in map {
259                let new_key = if prefix.is_empty() {
260                    k.clone()
261                } else {
262                    format!("{prefix}.{k}")
263                };
264                flatten_value(v, &new_key, depth + 1, max_depth, out);
265            }
266            return;
267        }
268    }
269    out.insert(prefix.to_owned(), value.clone());
270}
271
272impl CompressionStage for FlattenStage {
273    fn name(&self) -> &str {
274        "flatten"
275    }
276
277    fn priority(&self) -> u32 {
278        50
279    }
280
281    fn process(&self, content: &mut Content, config: &StageConfig) -> Result<()> {
282        if !config.enabled {
283            return Ok(());
284        }
285        let max_depth: u32 = config
286            .options
287            .get("max_depth")
288            .and_then(|v| v.as_u64())
289            .map(|v| v as u32)
290            .unwrap_or(3);
291
292        with_json(content, |value| {
293            if let serde_json::Value::Object(map) = value {
294                let mut out = serde_json::Map::new();
295                for (k, v) in map.iter() {
296                    flatten_value(v, k, 1, max_depth, &mut out);
297                }
298                *map = out;
299            }
300            Ok(())
301        })
302    }
303}
304
305// ---------------------------------------------------------------------------
306// Stage 6: truncate_strings
307// ---------------------------------------------------------------------------
308
309/// For JSON content, truncate string values longer than `max_length` chars,
310/// appending "..." to indicate truncation.
311/// Config options: `max_length` (u32, default 500).
312/// Non-JSON content passes through unchanged.
313pub struct TruncateStringsStage;
314
315fn truncate_strings_recursive(value: &mut serde_json::Value, max_length: usize) {
316    match value {
317        serde_json::Value::String(s) => {
318            if s.chars().count() > max_length {
319                let truncated: String = s.chars().take(max_length).collect();
320                *s = format!("{truncated}...");
321            }
322        }
323        serde_json::Value::Object(map) => {
324            for v in map.values_mut() {
325                truncate_strings_recursive(v, max_length);
326            }
327        }
328        serde_json::Value::Array(arr) => {
329            for item in arr.iter_mut() {
330                truncate_strings_recursive(item, max_length);
331            }
332        }
333        _ => {}
334    }
335}
336
337impl CompressionStage for TruncateStringsStage {
338    fn name(&self) -> &str {
339        "truncate_strings"
340    }
341
342    fn priority(&self) -> u32 {
343        60
344    }
345
346    fn process(&self, content: &mut Content, config: &StageConfig) -> Result<()> {
347        if !config.enabled {
348            return Ok(());
349        }
350        let max_length: usize = config
351            .options
352            .get("max_length")
353            .and_then(|v| v.as_u64())
354            .map(|v| v as usize)
355            .unwrap_or(500);
356
357        with_json(content, |value| {
358            truncate_strings_recursive(value, max_length);
359            Ok(())
360        })
361    }
362}
363
364// ---------------------------------------------------------------------------
365// Stage 7: collapse_arrays
366// ---------------------------------------------------------------------------
367
368/// For JSON content, if an array has more than `max_items` elements, keep the
369/// first `max_items` and replace the rest with a summary string element.
370/// Config options:
371///   - `max_items` (u32, default 5)
372///   - `summary_template` (string, default "... and {remaining} more items")
373/// Non-JSON content passes through unchanged.
374pub struct CollapseArraysStage;
375
376fn collapse_arrays_recursive(
377    value: &mut serde_json::Value,
378    max_items: usize,
379    summary_template: &str,
380) {
381    match value {
382        serde_json::Value::Array(arr) => {
383            // First recurse into existing items
384            for item in arr.iter_mut() {
385                collapse_arrays_recursive(item, max_items, summary_template);
386            }
387            // Then collapse if needed
388            if arr.len() > max_items {
389                let remaining = arr.len() - max_items;
390                arr.truncate(max_items);
391                let summary = summary_template.replace("{remaining}", &remaining.to_string());
392                arr.push(serde_json::Value::String(summary));
393            }
394        }
395        serde_json::Value::Object(map) => {
396            for v in map.values_mut() {
397                collapse_arrays_recursive(v, max_items, summary_template);
398            }
399        }
400        _ => {}
401    }
402}
403
404impl CompressionStage for CollapseArraysStage {
405    fn name(&self) -> &str {
406        "collapse_arrays"
407    }
408
409    fn priority(&self) -> u32 {
410        70
411    }
412
413    fn process(&self, content: &mut Content, config: &StageConfig) -> Result<()> {
414        if !config.enabled {
415            return Ok(());
416        }
417        let max_items: usize = config
418            .options
419            .get("max_items")
420            .and_then(|v| v.as_u64())
421            .map(|v| v as usize)
422            .unwrap_or(5);
423        let summary_template = config
424            .options
425            .get("summary_template")
426            .and_then(|v| v.as_str())
427            .unwrap_or("... and {remaining} more items")
428            .to_owned();
429
430        with_json(content, |value| {
431            collapse_arrays_recursive(value, max_items, &summary_template);
432            Ok(())
433        })
434    }
435}
436
437// ---------------------------------------------------------------------------
438// Stage 7b: git_diff_fold
439// ---------------------------------------------------------------------------
440
441/// For git diff output, fold consecutive unchanged context lines (lines
442/// starting with a space) into a compact `[N unchanged lines]` marker.
443/// This preserves all changed lines (+/-) and hunk headers (@@) while
444/// dramatically reducing noise from context lines.
445///
446/// Config options:
447///   - `max_context_lines` (u32, default 2) — keep this many context lines
448///     before/after each changed block before folding the rest.
449pub struct GitDiffFoldStage;
450
451impl CompressionStage for GitDiffFoldStage {
452    fn name(&self) -> &str {
453        "git_diff_fold"
454    }
455
456    fn priority(&self) -> u32 {
457        35
458    }
459
460    fn process(&self, content: &mut Content, config: &StageConfig) -> Result<()> {
461        if !config.enabled {
462            return Ok(());
463        }
464        // Only apply to plain text / CLI output that looks like a diff
465        match &content.content_type {
466            ContentType::PlainText | ContentType::CliOutput { .. } => {}
467            _ => return Ok(()),
468        }
469        // Quick check: must contain diff markers
470        if !content.raw.contains("\n+") && !content.raw.contains("\n-") {
471            return Ok(());
472        }
473
474        let max_ctx: usize = config
475            .options
476            .get("max_context_lines")
477            .and_then(|v| v.as_u64())
478            .map(|v| v as usize)
479            .unwrap_or(2);
480
481        let lines: Vec<&str> = content.raw.lines().collect();
482        let n = lines.len();
483
484        // Mark which lines are "changed" (added, removed, or hunk headers)
485        let is_changed: Vec<bool> = lines
486            .iter()
487            .map(|l| {
488                l.starts_with('+')
489                    || l.starts_with('-')
490                    || l.starts_with("@@")
491                    || l.starts_with("diff ")
492                    || l.starts_with("index ")
493                    || l.starts_with("--- ")
494                    || l.starts_with("+++ ")
495            })
496            .collect();
497
498        // For each context line, determine if it's within max_ctx of a changed line
499        let mut keep = vec![false; n];
500        for i in 0..n {
501            if is_changed[i] {
502                keep[i] = true;
503                // Keep max_ctx lines before
504                for j in i.saturating_sub(max_ctx)..i {
505                    keep[j] = true;
506                }
507                // Keep max_ctx lines after
508                for j in (i + 1)..n.min(i + 1 + max_ctx) {
509                    keep[j] = true;
510                }
511            }
512        }
513
514        // Build output, folding consecutive non-kept lines
515        let mut result = Vec::new();
516        let mut fold_count = 0usize;
517
518        for i in 0..n {
519            if keep[i] {
520                if fold_count > 0 {
521                    result.push(format!("[{fold_count} unchanged lines]"));
522                    fold_count = 0;
523                }
524                result.push(lines[i].to_owned());
525            } else {
526                fold_count += 1;
527            }
528        }
529        if fold_count > 0 {
530            result.push(format!("[{fold_count} unchanged lines]"));
531        }
532
533        let trailing_newline = content.raw.ends_with('\n');
534        content.raw = result.join("\n");
535        if trailing_newline {
536            content.raw.push('\n');
537        }
538        Ok(())
539    }
540}
541
542// ---------------------------------------------------------------------------
543// Stage 8: custom_transforms
544// ---------------------------------------------------------------------------
545
546/// No-op stage that serves as the insertion point for plugin stages.
547/// Passes content through unchanged.
548pub struct CustomTransformsStage;
549
550impl CompressionStage for CustomTransformsStage {
551    fn name(&self) -> &str {
552        "custom_transforms"
553    }
554
555    fn priority(&self) -> u32 {
556        80
557    }
558
559    fn process(&self, _content: &mut Content, config: &StageConfig) -> Result<()> {
560        if !config.enabled {
561            return Ok(());
562        }
563        // No-op: plugin stages are inserted here by the pipeline
564        Ok(())
565    }
566}
567
568// ---------------------------------------------------------------------------
569// Tests
570// ---------------------------------------------------------------------------
571
572#[cfg(test)]
573mod tests {
574    use super::*;
575    use crate::types::{ContentMetadata, ContentType};
576    use serde_json::json;
577
578    fn json_content(raw: &str) -> Content {
579        Content {
580            raw: raw.to_owned(),
581            content_type: ContentType::Json,
582            metadata: ContentMetadata {
583                source: None,
584                path: None,
585                language: None,
586            },
587            tokens_original: 0,
588        }
589    }
590
591    fn text_content(raw: &str) -> Content {
592        Content {
593            raw: raw.to_owned(),
594            content_type: ContentType::PlainText,
595            metadata: ContentMetadata {
596                source: None,
597                path: None,
598                language: None,
599            },
600            tokens_original: 0,
601        }
602    }
603
604    fn enabled_config(options: serde_json::Value) -> StageConfig {
605        StageConfig {
606            enabled: true,
607            options,
608        }
609    }
610
611    fn disabled_config() -> StageConfig {
612        StageConfig {
613            enabled: false,
614            options: json!({}),
615        }
616    }
617
618    // --- keep_fields ---
619
620    #[test]
621    fn keep_fields_retains_specified() {
622        let mut c = json_content(r#"{"id":1,"name":"Alice","debug":"x"}"#);
623        let cfg = enabled_config(json!({"fields": ["id", "name"]}));
624        KeepFieldsStage.process(&mut c, &cfg).unwrap();
625        let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
626        assert_eq!(v, json!({"id":1,"name":"Alice"}));
627    }
628
629    #[test]
630    fn keep_fields_disabled_passthrough() {
631        let raw = r#"{"id":1,"name":"Alice"}"#;
632        let mut c = json_content(raw);
633        KeepFieldsStage.process(&mut c, &disabled_config()).unwrap();
634        assert_eq!(c.raw, raw);
635    }
636
637    #[test]
638    fn keep_fields_non_json_passthrough() {
639        let raw = "not json at all";
640        let mut c = text_content(raw);
641        let cfg = enabled_config(json!({"fields": ["id"]}));
642        KeepFieldsStage.process(&mut c, &cfg).unwrap();
643        assert_eq!(c.raw, raw);
644    }
645
646    // --- strip_fields ---
647
648    #[test]
649    fn strip_fields_removes_top_level() {
650        let mut c = json_content(r#"{"id":1,"debug":"x","name":"Bob"}"#);
651        let cfg = enabled_config(json!({"fields": ["debug"]}));
652        StripFieldsStage.process(&mut c, &cfg).unwrap();
653        let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
654        assert_eq!(v, json!({"id":1,"name":"Bob"}));
655    }
656
657    #[test]
658    fn strip_fields_dot_notation() {
659        let mut c = json_content(r#"{"metadata":{"internal_id":"x","public":"y"},"name":"Bob"}"#);
660        let cfg = enabled_config(json!({"fields": ["metadata.internal_id"]}));
661        StripFieldsStage.process(&mut c, &cfg).unwrap();
662        let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
663        assert_eq!(v, json!({"metadata":{"public":"y"},"name":"Bob"}));
664    }
665
666    #[test]
667    fn strip_fields_disabled_passthrough() {
668        let raw = r#"{"id":1}"#;
669        let mut c = json_content(raw);
670        StripFieldsStage.process(&mut c, &disabled_config()).unwrap();
671        assert_eq!(c.raw, raw);
672    }
673
674    // --- condense ---
675
676    #[test]
677    fn condense_collapses_repeated_lines() {
678        let raw = "a\na\na\na\na\nb\n";
679        let mut c = text_content(raw);
680        let cfg = enabled_config(json!({"max_repeated_lines": 3}));
681        CondenseStage.process(&mut c, &cfg).unwrap();
682        assert_eq!(c.raw, "a\na\na\nb\n");
683    }
684
685    #[test]
686    fn condense_keeps_up_to_max() {
687        let raw = "x\nx\nx\n";
688        let mut c = text_content(raw);
689        let cfg = enabled_config(json!({"max_repeated_lines": 3}));
690        CondenseStage.process(&mut c, &cfg).unwrap();
691        assert_eq!(c.raw, "x\nx\nx\n");
692    }
693
694    #[test]
695    fn condense_disabled_passthrough() {
696        let raw = "a\na\na\na\n";
697        let mut c = text_content(raw);
698        CondenseStage.process(&mut c, &disabled_config()).unwrap();
699        assert_eq!(c.raw, raw);
700    }
701
702    #[test]
703    fn condense_skips_json() {
704        let raw = r#"{"a":1}"#;
705        let mut c = json_content(raw);
706        let cfg = enabled_config(json!({"max_repeated_lines": 1}));
707        CondenseStage.process(&mut c, &cfg).unwrap();
708        assert_eq!(c.raw, raw);
709    }
710
711    // --- strip_nulls ---
712
713    #[test]
714    fn strip_nulls_removes_null_fields() {
715        let mut c = json_content(r#"{"a":1,"b":null,"c":"x"}"#);
716        let cfg = enabled_config(json!({}));
717        StripNullsStage.process(&mut c, &cfg).unwrap();
718        let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
719        assert_eq!(v, json!({"a":1,"c":"x"}));
720    }
721
722    #[test]
723    fn strip_nulls_recursive() {
724        let mut c = json_content(r#"{"a":{"b":null,"c":1}}"#);
725        let cfg = enabled_config(json!({}));
726        StripNullsStage.process(&mut c, &cfg).unwrap();
727        let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
728        assert_eq!(v, json!({"a":{"c":1}}));
729    }
730
731    #[test]
732    fn strip_nulls_keeps_null_in_arrays() {
733        let mut c = json_content(r#"{"arr":[1,null,2]}"#);
734        let cfg = enabled_config(json!({}));
735        StripNullsStage.process(&mut c, &cfg).unwrap();
736        let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
737        assert_eq!(v, json!({"arr":[1,null,2]}));
738    }
739
740    #[test]
741    fn strip_nulls_disabled_passthrough() {
742        let raw = r#"{"a":null}"#;
743        let mut c = json_content(raw);
744        StripNullsStage.process(&mut c, &disabled_config()).unwrap();
745        assert_eq!(c.raw, raw);
746    }
747
748    // --- flatten ---
749
750    #[test]
751    fn flatten_nested_object() {
752        let mut c = json_content(r#"{"a":{"b":{"c":1}}}"#);
753        let cfg = enabled_config(json!({"max_depth": 3}));
754        FlattenStage.process(&mut c, &cfg).unwrap();
755        let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
756        assert_eq!(v, json!({"a.b.c":1}));
757    }
758
759    #[test]
760    fn flatten_respects_max_depth() {
761        let mut c = json_content(r#"{"a":{"b":{"c":1}}}"#);
762        let cfg = enabled_config(json!({"max_depth": 1}));
763        FlattenStage.process(&mut c, &cfg).unwrap();
764        let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
765        // At max_depth=1, top-level values are not descended into
766        assert_eq!(v, json!({"a":{"b":{"c":1}}}));
767    }
768
769    #[test]
770    fn flatten_disabled_passthrough() {
771        let raw = r#"{"a":{"b":1}}"#;
772        let mut c = json_content(raw);
773        FlattenStage.process(&mut c, &disabled_config()).unwrap();
774        assert_eq!(c.raw, raw);
775    }
776
777    // --- truncate_strings ---
778
779    #[test]
780    fn truncate_strings_long_value() {
781        let long = "a".repeat(600);
782        let raw = format!(r#"{{"key":"{}"}}"#, long);
783        let mut c = json_content(&raw);
784        let cfg = enabled_config(json!({"max_length": 500}));
785        TruncateStringsStage.process(&mut c, &cfg).unwrap();
786        let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
787        let s = v["key"].as_str().unwrap();
788        assert!(s.ends_with("..."));
789        assert_eq!(s.chars().count(), 503); // 500 + "..."
790    }
791
792    #[test]
793    fn truncate_strings_short_value_unchanged() {
794        let raw = r#"{"key":"hello"}"#;
795        let mut c = json_content(raw);
796        let cfg = enabled_config(json!({"max_length": 500}));
797        TruncateStringsStage.process(&mut c, &cfg).unwrap();
798        let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
799        assert_eq!(v["key"].as_str().unwrap(), "hello");
800    }
801
802    #[test]
803    fn truncate_strings_disabled_passthrough() {
804        let long = "a".repeat(600);
805        let raw = format!(r#"{{"key":"{}"}}"#, long);
806        let mut c = json_content(&raw);
807        TruncateStringsStage.process(&mut c, &disabled_config()).unwrap();
808        assert_eq!(c.raw, raw);
809    }
810
811    // --- collapse_arrays ---
812
813    #[test]
814    fn collapse_arrays_truncates_long_array() {
815        let mut c = json_content(r#"{"items":[1,2,3,4,5,6,7]}"#);
816        let cfg = enabled_config(json!({
817            "max_items": 5,
818            "summary_template": "... and {remaining} more items"
819        }));
820        CollapseArraysStage.process(&mut c, &cfg).unwrap();
821        let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
822        let arr = v["items"].as_array().unwrap();
823        assert_eq!(arr.len(), 6); // 5 kept + 1 summary
824        assert_eq!(arr[5].as_str().unwrap(), "... and 2 more items");
825    }
826
827    #[test]
828    fn collapse_arrays_short_array_unchanged() {
829        let raw = r#"{"items":[1,2,3]}"#;
830        let mut c = json_content(raw);
831        let cfg = enabled_config(json!({"max_items": 5}));
832        CollapseArraysStage.process(&mut c, &cfg).unwrap();
833        let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
834        assert_eq!(v["items"].as_array().unwrap().len(), 3);
835    }
836
837    #[test]
838    fn collapse_arrays_disabled_passthrough() {
839        let raw = r#"{"items":[1,2,3,4,5,6,7]}"#;
840        let mut c = json_content(raw);
841        CollapseArraysStage.process(&mut c, &disabled_config()).unwrap();
842        assert_eq!(c.raw, raw);
843    }
844
845    // --- git_diff_fold ---
846
847    #[test]
848    fn git_diff_fold_folds_unchanged_lines() {
849        // Use a realistic diff with many unchanged context lines
850        let diff = concat!(
851            "diff --git a/src/main.rs b/src/main.rs\n",
852            "--- a/src/main.rs\n",
853            "+++ b/src/main.rs\n",
854            "@@ -1,12 +1,12 @@\n",
855            " line1\n",
856            " line2\n",
857            " line3\n",
858            " line4\n",
859            " line5\n",
860            " line6\n",
861            "-old line\n",
862            "+new line\n",
863            " line7\n",
864            " line8\n",
865            " line9\n",
866            " line10\n",
867            " line11\n",
868            " line12\n",
869        );
870        let mut c = text_content(diff);
871        let cfg = enabled_config(serde_json::json!({"max_context_lines": 2}));
872        GitDiffFoldStage.process(&mut c, &cfg).unwrap();
873        // Changed lines must be preserved
874        assert!(c.raw.contains("-old line"), "output: {}", c.raw);
875        assert!(c.raw.contains("+new line"), "output: {}", c.raw);
876        // Hunk header must be preserved
877        assert!(c.raw.contains("@@ -1,12"), "output: {}", c.raw);
878        // Output should be shorter (folded lines 1-4 and 9-12)
879        assert!(c.raw.len() < diff.len(), "output should be shorter, got:\n{}", c.raw);
880        // Should contain fold markers
881        assert!(c.raw.contains("unchanged lines"), "expected fold markers in:\n{}", c.raw);
882    }
883
884    #[test]
885    fn git_diff_fold_preserves_hunk_headers() {
886        let diff = "@@ -1,5 +1,5 @@\n unchanged\n-old\n+new\n unchanged\n";
887        let mut c = text_content(diff);
888        let cfg = enabled_config(serde_json::json!({"max_context_lines": 1}));
889        GitDiffFoldStage.process(&mut c, &cfg).unwrap();
890        assert!(c.raw.contains("@@ -1,5 +1,5 @@"), "output: {}", c.raw);
891    }
892
893    #[test]
894    fn git_diff_fold_skips_non_diff_text() {
895        let raw = "just some plain text\nno diff markers here\n";
896        let mut c = text_content(raw);
897        let cfg = enabled_config(serde_json::json!({"max_context_lines": 2}));
898        GitDiffFoldStage.process(&mut c, &cfg).unwrap();
899        assert_eq!(c.raw, raw);
900    }
901
902    #[test]
903    fn git_diff_fold_disabled_passthrough() {
904        let diff = "diff --git a/f b/f\n-old\n+new\n unchanged\n unchanged\n unchanged\n";
905        let mut c = text_content(diff);
906        GitDiffFoldStage.process(&mut c, &disabled_config()).unwrap();
907        assert_eq!(c.raw, diff);
908    }
909
910    // --- custom_transforms ---
911
912    #[test]
913    fn custom_transforms_is_noop() {
914        let raw = r#"{"a":1}"#;
915        let mut c = json_content(raw);
916        let cfg = enabled_config(json!({}));
917        CustomTransformsStage.process(&mut c, &cfg).unwrap();
918        assert_eq!(c.raw, raw);
919    }
920
921    #[test]
922    fn custom_transforms_disabled_passthrough() {
923        let raw = "some text";
924        let mut c = text_content(raw);
925        CustomTransformsStage.process(&mut c, &disabled_config()).unwrap();
926        assert_eq!(c.raw, raw);
927    }
928}