1use crate::error::{Result, SqzError};
2use crate::toon::ToonEncoder;
3use crate::types::{Content, ContentType, StageConfig};
4
5pub trait CompressionStage: Send + Sync {
10 fn name(&self) -> &str;
11 fn priority(&self) -> u32;
12 fn process(&self, content: &mut Content, config: &StageConfig) -> Result<()>;
13}
14
15fn with_json<F>(content: &mut Content, f: F) -> Result<()>
20where
21 F: FnOnce(&mut serde_json::Value) -> Result<()>,
22{
23 if !ToonEncoder::is_json(&content.raw) {
24 return Ok(());
25 }
26 let mut value: serde_json::Value = serde_json::from_str(&content.raw)?;
27 f(&mut value)?;
28 content.raw = serde_json::to_string(&value)?;
29 Ok(())
30}
31
32pub struct KeepFieldsStage;
40
41impl CompressionStage for KeepFieldsStage {
42 fn name(&self) -> &str {
43 "keep_fields"
44 }
45
46 fn priority(&self) -> u32 {
47 10
48 }
49
50 fn process(&self, content: &mut Content, config: &StageConfig) -> Result<()> {
51 if !config.enabled {
52 return Ok(());
53 }
54 let fields: Vec<String> = match config.options.get("fields") {
55 Some(v) => serde_json::from_value(v.clone())
56 .map_err(|e| SqzError::Other(format!("keep_fields: invalid fields option: {e}")))?,
57 None => return Ok(()),
58 };
59 if fields.is_empty() {
60 return Ok(());
61 }
62 with_json(content, |value| {
63 if let serde_json::Value::Object(map) = value {
64 map.retain(|k, _| fields.contains(k));
65 }
66 Ok(())
67 })
68 }
69}
70
71pub struct StripFieldsStage;
80
81fn strip_field_path(value: &mut serde_json::Value, path: &[&str]) {
82 if path.is_empty() {
83 return;
84 }
85 if let serde_json::Value::Object(map) = value {
86 if path.len() == 1 {
87 map.remove(path[0]);
88 } else {
89 if let Some(child) = map.get_mut(path[0]) {
90 strip_field_path(child, &path[1..]);
91 }
92 }
93 }
94}
95
96impl CompressionStage for StripFieldsStage {
97 fn name(&self) -> &str {
98 "strip_fields"
99 }
100
101 fn priority(&self) -> u32 {
102 20
103 }
104
105 fn process(&self, content: &mut Content, config: &StageConfig) -> Result<()> {
106 if !config.enabled {
107 return Ok(());
108 }
109 let fields: Vec<String> = match config.options.get("fields") {
110 Some(v) => serde_json::from_value(v.clone())
111 .map_err(|e| SqzError::Other(format!("strip_fields: invalid fields option: {e}")))?,
112 None => return Ok(()),
113 };
114 if fields.is_empty() {
115 return Ok(());
116 }
117 with_json(content, |value| {
118 for field in &fields {
119 let parts: Vec<&str> = field.split('.').collect();
120 strip_field_path(value, &parts);
121 }
122 Ok(())
123 })
124 }
125}
126
127pub struct CondenseStage;
136
137impl CompressionStage for CondenseStage {
138 fn name(&self) -> &str {
139 "condense"
140 }
141
142 fn priority(&self) -> u32 {
143 30
144 }
145
146 fn process(&self, content: &mut Content, config: &StageConfig) -> Result<()> {
147 if !config.enabled {
148 return Ok(());
149 }
150 match &content.content_type {
152 ContentType::PlainText | ContentType::CliOutput { .. } => {}
153 _ => return Ok(()),
154 }
155
156 let max_repeated: u32 = config
157 .options
158 .get("max_repeated_lines")
159 .and_then(|v| v.as_u64())
160 .map(|v| v as u32)
161 .unwrap_or(3);
162
163 let mut result = Vec::new();
164 let mut current_line: Option<&str> = None;
165 let mut run_count: u32 = 0;
166
167 for line in content.raw.lines() {
168 match current_line {
169 Some(prev) if prev == line => {
170 run_count += 1;
171 if run_count <= max_repeated {
172 result.push(line);
173 }
174 }
175 _ => {
176 current_line = Some(line);
177 run_count = 1;
178 result.push(line);
179 }
180 }
181 }
182
183 let trailing_newline = content.raw.ends_with('\n');
185 content.raw = result.join("\n");
186 if trailing_newline {
187 content.raw.push('\n');
188 }
189 Ok(())
190 }
191}
192
193pub struct StripNullsStage;
201
202fn strip_nulls_recursive(value: &mut serde_json::Value) {
203 match value {
204 serde_json::Value::Object(map) => {
205 map.retain(|_, v| !v.is_null());
206 for v in map.values_mut() {
207 strip_nulls_recursive(v);
208 }
209 }
210 serde_json::Value::Array(arr) => {
211 for item in arr.iter_mut() {
212 strip_nulls_recursive(item);
213 }
214 }
215 _ => {}
216 }
217}
218
219impl CompressionStage for StripNullsStage {
220 fn name(&self) -> &str {
221 "strip_nulls"
222 }
223
224 fn priority(&self) -> u32 {
225 40
226 }
227
228 fn process(&self, content: &mut Content, config: &StageConfig) -> Result<()> {
229 if !config.enabled {
230 return Ok(());
231 }
232 with_json(content, |value| {
233 strip_nulls_recursive(value);
234 Ok(())
235 })
236 }
237}
238
239pub struct FlattenStage;
248
249fn flatten_value(
250 value: &serde_json::Value,
251 prefix: &str,
252 depth: u32,
253 max_depth: u32,
254 out: &mut serde_json::Map<String, serde_json::Value>,
255) {
256 if let serde_json::Value::Object(map) = value {
257 if depth < max_depth {
258 for (k, v) in map {
259 let new_key = if prefix.is_empty() {
260 k.clone()
261 } else {
262 format!("{prefix}.{k}")
263 };
264 flatten_value(v, &new_key, depth + 1, max_depth, out);
265 }
266 return;
267 }
268 }
269 out.insert(prefix.to_owned(), value.clone());
270}
271
272impl CompressionStage for FlattenStage {
273 fn name(&self) -> &str {
274 "flatten"
275 }
276
277 fn priority(&self) -> u32 {
278 50
279 }
280
281 fn process(&self, content: &mut Content, config: &StageConfig) -> Result<()> {
282 if !config.enabled {
283 return Ok(());
284 }
285 let max_depth: u32 = config
286 .options
287 .get("max_depth")
288 .and_then(|v| v.as_u64())
289 .map(|v| v as u32)
290 .unwrap_or(3);
291
292 with_json(content, |value| {
293 if let serde_json::Value::Object(map) = value {
294 let mut out = serde_json::Map::new();
295 for (k, v) in map.iter() {
296 flatten_value(v, k, 1, max_depth, &mut out);
297 }
298 *map = out;
299 }
300 Ok(())
301 })
302 }
303}
304
305pub struct TruncateStringsStage;
314
315fn truncate_strings_recursive(value: &mut serde_json::Value, max_length: usize) {
316 match value {
317 serde_json::Value::String(s) => {
318 if s.chars().count() > max_length {
319 let truncated: String = s.chars().take(max_length).collect();
320 *s = format!("{truncated}...");
321 }
322 }
323 serde_json::Value::Object(map) => {
324 for v in map.values_mut() {
325 truncate_strings_recursive(v, max_length);
326 }
327 }
328 serde_json::Value::Array(arr) => {
329 for item in arr.iter_mut() {
330 truncate_strings_recursive(item, max_length);
331 }
332 }
333 _ => {}
334 }
335}
336
337impl CompressionStage for TruncateStringsStage {
338 fn name(&self) -> &str {
339 "truncate_strings"
340 }
341
342 fn priority(&self) -> u32 {
343 60
344 }
345
346 fn process(&self, content: &mut Content, config: &StageConfig) -> Result<()> {
347 if !config.enabled {
348 return Ok(());
349 }
350 let max_length: usize = config
351 .options
352 .get("max_length")
353 .and_then(|v| v.as_u64())
354 .map(|v| v as usize)
355 .unwrap_or(500);
356
357 with_json(content, |value| {
358 truncate_strings_recursive(value, max_length);
359 Ok(())
360 })
361 }
362}
363
364pub struct CollapseArraysStage;
379
380fn detect_uniform_array(arr: &[serde_json::Value]) -> Option<Vec<String>> {
383 if arr.len() < 2 {
384 return None;
385 }
386
387 let first_keys: Vec<String> = match &arr[0] {
388 serde_json::Value::Object(map) => {
389 if map.is_empty() {
390 return None;
391 }
392 map.keys().cloned().collect()
393 }
394 _ => return None,
395 };
396
397 for item in &arr[1..] {
399 match item {
400 serde_json::Value::Object(map) => {
401 if map.len() != first_keys.len() {
402 return None;
403 }
404 for key in &first_keys {
405 if !map.contains_key(key) {
406 return None;
407 }
408 }
409 }
410 _ => return None,
411 }
412 }
413
414 Some(first_keys)
415}
416
417fn encode_tabular(arr: &[serde_json::Value], keys: &[String]) -> String {
420 let mut lines = Vec::with_capacity(arr.len() + 1);
421
422 lines.push(keys.join(" | "));
424
425 for item in arr {
427 if let serde_json::Value::Object(map) = item {
428 let row: Vec<String> = keys
429 .iter()
430 .map(|k| value_to_compact_string(map.get(k).unwrap_or(&serde_json::Value::Null)))
431 .collect();
432 lines.push(row.join(" | "));
433 }
434 }
435
436 lines.join("\n")
437}
438
439fn value_to_compact_string(v: &serde_json::Value) -> String {
441 match v {
442 serde_json::Value::Null => "null".to_string(),
443 serde_json::Value::Bool(b) => b.to_string(),
444 serde_json::Value::Number(n) => n.to_string(),
445 serde_json::Value::String(s) => {
446 if s.len() > 50 {
447 format!("{}...", &s[..47])
448 } else {
449 s.clone()
450 }
451 }
452 serde_json::Value::Array(a) => format!("[{} items]", a.len()),
453 serde_json::Value::Object(m) => format!("{{{} keys}}", m.len()),
454 }
455}
456
457fn collapse_arrays_recursive(
458 value: &mut serde_json::Value,
459 max_items: usize,
460 summary_template: &str,
461) {
462 match value {
463 serde_json::Value::Array(arr) => {
464 for item in arr.iter_mut() {
466 collapse_arrays_recursive(item, max_items, summary_template);
467 }
468
469 if arr.len() > max_items {
471 if let Some(keys) = detect_uniform_array(arr) {
472 let table = encode_tabular(arr, &keys);
473 let count = arr.len();
474 arr.clear();
475 arr.push(serde_json::Value::String(
476 format!("[table: {count} rows]\n{table}"),
477 ));
478 return;
479 }
480
481 let remaining = arr.len() - max_items;
483 arr.truncate(max_items);
484 let summary = summary_template.replace("{remaining}", &remaining.to_string());
485 arr.push(serde_json::Value::String(summary));
486 }
487 }
488 serde_json::Value::Object(map) => {
489 for v in map.values_mut() {
490 collapse_arrays_recursive(v, max_items, summary_template);
491 }
492 }
493 _ => {}
494 }
495}
496
497impl CompressionStage for CollapseArraysStage {
498 fn name(&self) -> &str {
499 "collapse_arrays"
500 }
501
502 fn priority(&self) -> u32 {
503 70
504 }
505
506 fn process(&self, content: &mut Content, config: &StageConfig) -> Result<()> {
507 if !config.enabled {
508 return Ok(());
509 }
510 let max_items: usize = config
511 .options
512 .get("max_items")
513 .and_then(|v| v.as_u64())
514 .map(|v| v as usize)
515 .unwrap_or(5);
516 let summary_template = config
517 .options
518 .get("summary_template")
519 .and_then(|v| v.as_str())
520 .unwrap_or("... and {remaining} more items")
521 .to_owned();
522
523 with_json(content, |value| {
524 collapse_arrays_recursive(value, max_items, &summary_template);
525 Ok(())
526 })
527 }
528}
529
530pub struct WordAbbreviateStage;
540
541const WORD_ABBREVIATIONS: &[(&str, &str)] = &[
544 ("implementation", "impl"),
545 ("implementations", "impls"),
546 ("configuration", "config"),
547 ("configurations", "configs"),
548 ("authentication", "auth"),
549 ("authorization", "authz"),
550 ("application", "app"),
551 ("applications", "apps"),
552 ("environment", "env"),
553 ("environments", "envs"),
554 ("development", "dev"),
555 ("production", "prod"),
556 ("repository", "repo"),
557 ("repositories", "repos"),
558 ("dependency", "dep"),
559 ("dependencies", "deps"),
560 ("documentation", "docs"),
561 ("information", "info"),
562 ("directory", "dir"),
563 ("directories", "dirs"),
564 ("parameter", "param"),
565 ("parameters", "params"),
566 ("argument", "arg"),
567 ("arguments", "args"),
568 ("function", "fn"),
569 ("functions", "fns"),
570 ("reference", "ref"),
571 ("references", "refs"),
572 ("specification", "spec"),
573 ("specifications", "specs"),
574 ("temporary", "tmp"),
575 ("administrator", "admin"),
576 ("administrators", "admins"),
577 ("database", "db"),
578 ("databases", "dbs"),
579 ("message", "msg"),
580 ("messages", "msgs"),
581 ("response", "resp"),
582 ("request", "req"),
583 ("requests", "reqs"),
584 ("attribute", "attr"),
585 ("attributes", "attrs"),
586 ("expression", "expr"),
587 ("expressions", "exprs"),
588 ("operation", "op"),
589 ("operations", "ops"),
590 ("maximum", "max"),
591 ("minimum", "min"),
592 ("number", "num"),
593 ("string", "str"),
594 ("boolean", "bool"),
595 ("integer", "int"),
596 ("previous", "prev"),
597 ("current", "curr"),
598 ("original", "orig"),
599 ("source", "src"),
600 ("destination", "dest"),
601 ("package", "pkg"),
602 ("packages", "pkgs"),
603 ("library", "lib"),
604 ("libraries", "libs"),
605 ("executable", "exec"),
606 ("executables", "execs"),
607 ("command", "cmd"),
608 ("commands", "cmds"),
609 ("variable", "var"),
610 ("variables", "vars"),
611 ("certificate", "cert"),
612 ("certificates", "certs"),
613 ("synchronize", "sync"),
614 ("asynchronous", "async"),
615 ("initialize", "init"),
616 ("allocation", "alloc"),
617 ("allocations", "allocs"),
618 ("generation", "gen"),
619 ("miscellaneous", "misc"),
620 ("utility", "util"),
621 ("utilities", "utils"),
622 ("statistics", "stats"),
623 ("connection", "conn"),
624 ("connections", "conns"),
625 ("transaction", "txn"),
626 ("transactions", "txns"),
627 ("management", "mgmt"),
628 ("notification", "notif"),
629 ("notifications", "notifs"),
630 ("permission", "perm"),
631 ("permissions", "perms"),
632 ("distribution", "distro"),
633 ("distributions", "distros"),
634 ("architecture", "arch"),
635 ("infrastructure", "infra"),
636 ("kubernetes", "k8s"),
637 ("namespace", "ns"),
638 ("namespaces", "nses"),
639 ("container", "ctr"),
640 ("containers", "ctrs"),
641 ("microservice", "svc"),
642 ("microservices", "svcs"),
643];
644
645impl CompressionStage for WordAbbreviateStage {
646 fn name(&self) -> &str {
647 "word_abbreviate"
648 }
649
650 fn priority(&self) -> u32 {
651 25 }
653
654 fn process(&self, content: &mut Content, config: &StageConfig) -> Result<()> {
655 if !config.enabled {
656 return Ok(());
657 }
658 match &content.content_type {
660 ContentType::PlainText | ContentType::CliOutput { .. } => {}
661 _ => return Ok(()),
662 }
663
664 let mut result = content.raw.clone();
665 for &(long, short) in WORD_ABBREVIATIONS {
666 result = replace_whole_word(&result, long, short);
669 }
670
671 content.raw = result;
672 Ok(())
673 }
674}
675
676pub fn abbreviate_words(text: &str) -> String {
681 let mut result = text.to_string();
682 for &(long, short) in WORD_ABBREVIATIONS {
683 result = replace_whole_word(&result, long, short);
684 }
685 result
686}
687
688fn replace_whole_word(text: &str, word: &str, replacement: &str) -> String {
691 if text.is_empty() || word.is_empty() {
692 return text.to_string();
693 }
694
695 let lower = text.to_lowercase();
696 let word_lower = word.to_lowercase();
697 let word_len = word.len();
698 let mut result = String::with_capacity(text.len());
699 let mut last_end = 0;
700
701 let text_bytes = text.as_bytes();
702
703 for (start, _) in lower.match_indices(&word_lower) {
704 let end = start + word_len;
705
706 let before_ok = start == 0
708 || !text_bytes[start - 1].is_ascii_alphanumeric();
709 let after_ok = end >= text.len()
711 || !text_bytes[end].is_ascii_alphanumeric();
712
713 if before_ok && after_ok {
714 result.push_str(&text[last_end..start]);
715 result.push_str(replacement);
716 last_end = end;
717 }
718 }
719
720 result.push_str(&text[last_end..]);
721 result
722}
723
724pub struct GitDiffFoldStage;
737
738impl CompressionStage for GitDiffFoldStage {
739 fn name(&self) -> &str {
740 "git_diff_fold"
741 }
742
743 fn priority(&self) -> u32 {
744 35
745 }
746
747 fn process(&self, content: &mut Content, config: &StageConfig) -> Result<()> {
748 if !config.enabled {
749 return Ok(());
750 }
751 match &content.content_type {
753 ContentType::PlainText | ContentType::CliOutput { .. } => {}
754 _ => return Ok(()),
755 }
756 if !content.raw.contains("\n+") && !content.raw.contains("\n-") {
758 return Ok(());
759 }
760
761 let max_ctx: usize = config
762 .options
763 .get("max_context_lines")
764 .and_then(|v| v.as_u64())
765 .map(|v| v as usize)
766 .unwrap_or(2);
767
768 let lines: Vec<&str> = content.raw.lines().collect();
769 let n = lines.len();
770
771 let is_changed: Vec<bool> = lines
773 .iter()
774 .map(|l| {
775 l.starts_with('+')
776 || l.starts_with('-')
777 || l.starts_with("@@")
778 || l.starts_with("diff ")
779 || l.starts_with("index ")
780 || l.starts_with("--- ")
781 || l.starts_with("+++ ")
782 })
783 .collect();
784
785 let mut keep = vec![false; n];
787 for i in 0..n {
788 if is_changed[i] {
789 keep[i] = true;
790 for j in i.saturating_sub(max_ctx)..i {
792 keep[j] = true;
793 }
794 for j in (i + 1)..n.min(i + 1 + max_ctx) {
796 keep[j] = true;
797 }
798 }
799 }
800
801 let mut result = Vec::new();
803 let mut fold_count = 0usize;
804
805 for i in 0..n {
806 if keep[i] {
807 if fold_count > 0 {
808 result.push(format!("[{fold_count} unchanged lines]"));
809 fold_count = 0;
810 }
811 result.push(lines[i].to_owned());
812 } else {
813 fold_count += 1;
814 }
815 }
816 if fold_count > 0 {
817 result.push(format!("[{fold_count} unchanged lines]"));
818 }
819
820 let trailing_newline = content.raw.ends_with('\n');
821 content.raw = result.join("\n");
822 if trailing_newline {
823 content.raw.push('\n');
824 }
825 Ok(())
826 }
827}
828
829pub struct CustomTransformsStage;
836
837impl CompressionStage for CustomTransformsStage {
838 fn name(&self) -> &str {
839 "custom_transforms"
840 }
841
842 fn priority(&self) -> u32 {
843 80
844 }
845
846 fn process(&self, _content: &mut Content, config: &StageConfig) -> Result<()> {
847 if !config.enabled {
848 return Ok(());
849 }
850 Ok(())
852 }
853}
854
855#[cfg(test)]
860mod tests {
861 use super::*;
862 use crate::types::{ContentMetadata, ContentType};
863 use serde_json::json;
864
865 fn json_content(raw: &str) -> Content {
866 Content {
867 raw: raw.to_owned(),
868 content_type: ContentType::Json,
869 metadata: ContentMetadata {
870 source: None,
871 path: None,
872 language: None,
873 },
874 tokens_original: 0,
875 }
876 }
877
878 fn text_content(raw: &str) -> Content {
879 Content {
880 raw: raw.to_owned(),
881 content_type: ContentType::PlainText,
882 metadata: ContentMetadata {
883 source: None,
884 path: None,
885 language: None,
886 },
887 tokens_original: 0,
888 }
889 }
890
891 fn enabled_config(options: serde_json::Value) -> StageConfig {
892 StageConfig {
893 enabled: true,
894 options,
895 }
896 }
897
898 fn disabled_config() -> StageConfig {
899 StageConfig {
900 enabled: false,
901 options: json!({}),
902 }
903 }
904
905 #[test]
908 fn keep_fields_retains_specified() {
909 let mut c = json_content(r#"{"id":1,"name":"Alice","debug":"x"}"#);
910 let cfg = enabled_config(json!({"fields": ["id", "name"]}));
911 KeepFieldsStage.process(&mut c, &cfg).unwrap();
912 let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
913 assert_eq!(v, json!({"id":1,"name":"Alice"}));
914 }
915
916 #[test]
917 fn keep_fields_disabled_passthrough() {
918 let raw = r#"{"id":1,"name":"Alice"}"#;
919 let mut c = json_content(raw);
920 KeepFieldsStage.process(&mut c, &disabled_config()).unwrap();
921 assert_eq!(c.raw, raw);
922 }
923
924 #[test]
925 fn keep_fields_non_json_passthrough() {
926 let raw = "not json at all";
927 let mut c = text_content(raw);
928 let cfg = enabled_config(json!({"fields": ["id"]}));
929 KeepFieldsStage.process(&mut c, &cfg).unwrap();
930 assert_eq!(c.raw, raw);
931 }
932
933 #[test]
936 fn strip_fields_removes_top_level() {
937 let mut c = json_content(r#"{"id":1,"debug":"x","name":"Bob"}"#);
938 let cfg = enabled_config(json!({"fields": ["debug"]}));
939 StripFieldsStage.process(&mut c, &cfg).unwrap();
940 let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
941 assert_eq!(v, json!({"id":1,"name":"Bob"}));
942 }
943
944 #[test]
945 fn strip_fields_dot_notation() {
946 let mut c = json_content(r#"{"metadata":{"internal_id":"x","public":"y"},"name":"Bob"}"#);
947 let cfg = enabled_config(json!({"fields": ["metadata.internal_id"]}));
948 StripFieldsStage.process(&mut c, &cfg).unwrap();
949 let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
950 assert_eq!(v, json!({"metadata":{"public":"y"},"name":"Bob"}));
951 }
952
953 #[test]
954 fn strip_fields_disabled_passthrough() {
955 let raw = r#"{"id":1}"#;
956 let mut c = json_content(raw);
957 StripFieldsStage.process(&mut c, &disabled_config()).unwrap();
958 assert_eq!(c.raw, raw);
959 }
960
961 #[test]
964 fn condense_collapses_repeated_lines() {
965 let raw = "a\na\na\na\na\nb\n";
966 let mut c = text_content(raw);
967 let cfg = enabled_config(json!({"max_repeated_lines": 3}));
968 CondenseStage.process(&mut c, &cfg).unwrap();
969 assert_eq!(c.raw, "a\na\na\nb\n");
970 }
971
972 #[test]
973 fn condense_keeps_up_to_max() {
974 let raw = "x\nx\nx\n";
975 let mut c = text_content(raw);
976 let cfg = enabled_config(json!({"max_repeated_lines": 3}));
977 CondenseStage.process(&mut c, &cfg).unwrap();
978 assert_eq!(c.raw, "x\nx\nx\n");
979 }
980
981 #[test]
982 fn condense_disabled_passthrough() {
983 let raw = "a\na\na\na\n";
984 let mut c = text_content(raw);
985 CondenseStage.process(&mut c, &disabled_config()).unwrap();
986 assert_eq!(c.raw, raw);
987 }
988
989 #[test]
990 fn condense_skips_json() {
991 let raw = r#"{"a":1}"#;
992 let mut c = json_content(raw);
993 let cfg = enabled_config(json!({"max_repeated_lines": 1}));
994 CondenseStage.process(&mut c, &cfg).unwrap();
995 assert_eq!(c.raw, raw);
996 }
997
998 #[test]
1001 fn strip_nulls_removes_null_fields() {
1002 let mut c = json_content(r#"{"a":1,"b":null,"c":"x"}"#);
1003 let cfg = enabled_config(json!({}));
1004 StripNullsStage.process(&mut c, &cfg).unwrap();
1005 let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
1006 assert_eq!(v, json!({"a":1,"c":"x"}));
1007 }
1008
1009 #[test]
1010 fn strip_nulls_recursive() {
1011 let mut c = json_content(r#"{"a":{"b":null,"c":1}}"#);
1012 let cfg = enabled_config(json!({}));
1013 StripNullsStage.process(&mut c, &cfg).unwrap();
1014 let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
1015 assert_eq!(v, json!({"a":{"c":1}}));
1016 }
1017
1018 #[test]
1019 fn strip_nulls_keeps_null_in_arrays() {
1020 let mut c = json_content(r#"{"arr":[1,null,2]}"#);
1021 let cfg = enabled_config(json!({}));
1022 StripNullsStage.process(&mut c, &cfg).unwrap();
1023 let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
1024 assert_eq!(v, json!({"arr":[1,null,2]}));
1025 }
1026
1027 #[test]
1028 fn strip_nulls_disabled_passthrough() {
1029 let raw = r#"{"a":null}"#;
1030 let mut c = json_content(raw);
1031 StripNullsStage.process(&mut c, &disabled_config()).unwrap();
1032 assert_eq!(c.raw, raw);
1033 }
1034
1035 #[test]
1038 fn flatten_nested_object() {
1039 let mut c = json_content(r#"{"a":{"b":{"c":1}}}"#);
1040 let cfg = enabled_config(json!({"max_depth": 3}));
1041 FlattenStage.process(&mut c, &cfg).unwrap();
1042 let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
1043 assert_eq!(v, json!({"a.b.c":1}));
1044 }
1045
1046 #[test]
1047 fn flatten_respects_max_depth() {
1048 let mut c = json_content(r#"{"a":{"b":{"c":1}}}"#);
1049 let cfg = enabled_config(json!({"max_depth": 1}));
1050 FlattenStage.process(&mut c, &cfg).unwrap();
1051 let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
1052 assert_eq!(v, json!({"a":{"b":{"c":1}}}));
1054 }
1055
1056 #[test]
1057 fn flatten_disabled_passthrough() {
1058 let raw = r#"{"a":{"b":1}}"#;
1059 let mut c = json_content(raw);
1060 FlattenStage.process(&mut c, &disabled_config()).unwrap();
1061 assert_eq!(c.raw, raw);
1062 }
1063
1064 #[test]
1067 fn truncate_strings_long_value() {
1068 let long = "a".repeat(600);
1069 let raw = format!(r#"{{"key":"{}"}}"#, long);
1070 let mut c = json_content(&raw);
1071 let cfg = enabled_config(json!({"max_length": 500}));
1072 TruncateStringsStage.process(&mut c, &cfg).unwrap();
1073 let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
1074 let s = v["key"].as_str().unwrap();
1075 assert!(s.ends_with("..."));
1076 assert_eq!(s.chars().count(), 503); }
1078
1079 #[test]
1080 fn truncate_strings_short_value_unchanged() {
1081 let raw = r#"{"key":"hello"}"#;
1082 let mut c = json_content(raw);
1083 let cfg = enabled_config(json!({"max_length": 500}));
1084 TruncateStringsStage.process(&mut c, &cfg).unwrap();
1085 let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
1086 assert_eq!(v["key"].as_str().unwrap(), "hello");
1087 }
1088
1089 #[test]
1090 fn truncate_strings_disabled_passthrough() {
1091 let long = "a".repeat(600);
1092 let raw = format!(r#"{{"key":"{}"}}"#, long);
1093 let mut c = json_content(&raw);
1094 TruncateStringsStage.process(&mut c, &disabled_config()).unwrap();
1095 assert_eq!(c.raw, raw);
1096 }
1097
1098 #[test]
1101 fn collapse_arrays_truncates_long_array() {
1102 let mut c = json_content(r#"{"items":[1,2,3,4,5,6,7]}"#);
1103 let cfg = enabled_config(json!({
1104 "max_items": 5,
1105 "summary_template": "... and {remaining} more items"
1106 }));
1107 CollapseArraysStage.process(&mut c, &cfg).unwrap();
1108 let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
1109 let arr = v["items"].as_array().unwrap();
1110 assert_eq!(arr.len(), 6); assert_eq!(arr[5].as_str().unwrap(), "... and 2 more items");
1112 }
1113
1114 #[test]
1115 fn collapse_arrays_short_array_unchanged() {
1116 let raw = r#"{"items":[1,2,3]}"#;
1117 let mut c = json_content(raw);
1118 let cfg = enabled_config(json!({"max_items": 5}));
1119 CollapseArraysStage.process(&mut c, &cfg).unwrap();
1120 let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
1121 assert_eq!(v["items"].as_array().unwrap().len(), 3);
1122 }
1123
1124 #[test]
1125 fn collapse_arrays_disabled_passthrough() {
1126 let raw = r#"{"items":[1,2,3,4,5,6,7]}"#;
1127 let mut c = json_content(raw);
1128 CollapseArraysStage.process(&mut c, &disabled_config()).unwrap();
1129 assert_eq!(c.raw, raw);
1130 }
1131
1132 #[test]
1135 fn git_diff_fold_folds_unchanged_lines() {
1136 let diff = concat!(
1138 "diff --git a/src/main.rs b/src/main.rs\n",
1139 "--- a/src/main.rs\n",
1140 "+++ b/src/main.rs\n",
1141 "@@ -1,12 +1,12 @@\n",
1142 " line1\n",
1143 " line2\n",
1144 " line3\n",
1145 " line4\n",
1146 " line5\n",
1147 " line6\n",
1148 "-old line\n",
1149 "+new line\n",
1150 " line7\n",
1151 " line8\n",
1152 " line9\n",
1153 " line10\n",
1154 " line11\n",
1155 " line12\n",
1156 );
1157 let mut c = text_content(diff);
1158 let cfg = enabled_config(serde_json::json!({"max_context_lines": 2}));
1159 GitDiffFoldStage.process(&mut c, &cfg).unwrap();
1160 assert!(c.raw.contains("-old line"), "output: {}", c.raw);
1162 assert!(c.raw.contains("+new line"), "output: {}", c.raw);
1163 assert!(c.raw.contains("@@ -1,12"), "output: {}", c.raw);
1165 assert!(c.raw.len() < diff.len(), "output should be shorter, got:\n{}", c.raw);
1167 assert!(c.raw.contains("unchanged lines"), "expected fold markers in:\n{}", c.raw);
1169 }
1170
1171 #[test]
1172 fn git_diff_fold_preserves_hunk_headers() {
1173 let diff = "@@ -1,5 +1,5 @@\n unchanged\n-old\n+new\n unchanged\n";
1174 let mut c = text_content(diff);
1175 let cfg = enabled_config(serde_json::json!({"max_context_lines": 1}));
1176 GitDiffFoldStage.process(&mut c, &cfg).unwrap();
1177 assert!(c.raw.contains("@@ -1,5 +1,5 @@"), "output: {}", c.raw);
1178 }
1179
1180 #[test]
1181 fn git_diff_fold_skips_non_diff_text() {
1182 let raw = "just some plain text\nno diff markers here\n";
1183 let mut c = text_content(raw);
1184 let cfg = enabled_config(serde_json::json!({"max_context_lines": 2}));
1185 GitDiffFoldStage.process(&mut c, &cfg).unwrap();
1186 assert_eq!(c.raw, raw);
1187 }
1188
1189 #[test]
1190 fn git_diff_fold_disabled_passthrough() {
1191 let diff = "diff --git a/f b/f\n-old\n+new\n unchanged\n unchanged\n unchanged\n";
1192 let mut c = text_content(diff);
1193 GitDiffFoldStage.process(&mut c, &disabled_config()).unwrap();
1194 assert_eq!(c.raw, diff);
1195 }
1196
1197 #[test]
1200 fn custom_transforms_is_noop() {
1201 let raw = r#"{"a":1}"#;
1202 let mut c = json_content(raw);
1203 let cfg = enabled_config(json!({}));
1204 CustomTransformsStage.process(&mut c, &cfg).unwrap();
1205 assert_eq!(c.raw, raw);
1206 }
1207
1208 #[test]
1209 fn custom_transforms_disabled_passthrough() {
1210 let raw = "some text";
1211 let mut c = text_content(raw);
1212 CustomTransformsStage.process(&mut c, &disabled_config()).unwrap();
1213 assert_eq!(c.raw, raw);
1214 }
1215
1216 #[test]
1219 fn collapse_arrays_tabular_encoding_uniform_objects() {
1220 let raw = r#"{"users":[
1222 {"id":1,"name":"Alice","role":"admin"},
1223 {"id":2,"name":"Bob","role":"user"},
1224 {"id":3,"name":"Carol","role":"user"},
1225 {"id":4,"name":"Dave","role":"admin"},
1226 {"id":5,"name":"Eve","role":"user"},
1227 {"id":6,"name":"Frank","role":"user"}
1228 ]}"#;
1229 let mut c = json_content(raw);
1230 let cfg = enabled_config(json!({
1231 "max_items": 3,
1232 "summary_template": "... and {remaining} more items"
1233 }));
1234 CollapseArraysStage.process(&mut c, &cfg).unwrap();
1235 let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
1236 let arr = v["users"].as_array().unwrap();
1237 assert_eq!(arr.len(), 1, "uniform array should be encoded as single table element");
1239 let table_str = arr[0].as_str().unwrap();
1240 assert!(table_str.contains("[table: 6 rows]"), "should contain row count: {}", table_str);
1241 assert!(table_str.contains("Alice"), "should contain data: {}", table_str);
1242 assert!(table_str.contains("Frank"), "should contain all rows: {}", table_str);
1243 }
1244
1245 #[test]
1246 fn collapse_arrays_mixed_objects_falls_back_to_truncation() {
1247 let raw = r#"{"items":[
1249 {"id":1,"name":"Alice"},
1250 {"x":2,"y":3},
1251 {"id":3,"name":"Carol"},
1252 {"x":4,"y":5},
1253 {"id":5,"name":"Eve"},
1254 {"x":6,"y":7}
1255 ]}"#;
1256 let mut c = json_content(raw);
1257 let cfg = enabled_config(json!({
1258 "max_items": 3,
1259 "summary_template": "... and {remaining} more items"
1260 }));
1261 CollapseArraysStage.process(&mut c, &cfg).unwrap();
1262 let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
1263 let arr = v["items"].as_array().unwrap();
1264 assert_eq!(arr.len(), 4);
1266 assert!(arr[3].as_str().unwrap().contains("3 more items"));
1267 }
1268
1269 #[test]
1270 fn collapse_arrays_small_uniform_array_unchanged() {
1271 let raw = r#"{"users":[{"id":1,"name":"Alice"},{"id":2,"name":"Bob"}]}"#;
1273 let mut c = json_content(raw);
1274 let cfg = enabled_config(json!({"max_items": 5}));
1275 CollapseArraysStage.process(&mut c, &cfg).unwrap();
1276 let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
1277 assert_eq!(v["users"].as_array().unwrap().len(), 2);
1278 }
1279
1280 #[test]
1281 fn detect_uniform_array_returns_keys_for_uniform() {
1282 let arr = vec![
1283 json!({"a": 1, "b": 2}),
1284 json!({"a": 3, "b": 4}),
1285 ];
1286 let keys = detect_uniform_array(&arr);
1287 assert!(keys.is_some());
1288 let keys = keys.unwrap();
1289 assert!(keys.contains(&"a".to_string()));
1290 assert!(keys.contains(&"b".to_string()));
1291 }
1292
1293 #[test]
1294 fn detect_uniform_array_returns_none_for_mixed() {
1295 let arr = vec![
1296 json!({"a": 1, "b": 2}),
1297 json!({"x": 3, "y": 4}),
1298 ];
1299 assert!(detect_uniform_array(&arr).is_none());
1300 }
1301
1302 #[test]
1303 fn detect_uniform_array_returns_none_for_non_objects() {
1304 let arr = vec![json!(1), json!(2), json!(3)];
1305 assert!(detect_uniform_array(&arr).is_none());
1306 }
1307
1308 #[test]
1309 fn detect_uniform_array_returns_none_for_single_element() {
1310 let arr = vec![json!({"a": 1})];
1311 assert!(detect_uniform_array(&arr).is_none());
1312 }
1313
1314 #[test]
1315 fn value_to_compact_string_truncates_long_strings() {
1316 let long = "a".repeat(100);
1317 let v = serde_json::Value::String(long);
1318 let s = value_to_compact_string(&v);
1319 assert!(s.len() <= 53); assert!(s.ends_with("..."));
1321 }
1322
1323 #[test]
1324 fn value_to_compact_string_short_string_unchanged() {
1325 let v = serde_json::Value::String("hello".to_string());
1326 assert_eq!(value_to_compact_string(&v), "hello");
1327 }
1328
1329 #[test]
1330 fn value_to_compact_string_nested_types() {
1331 assert_eq!(value_to_compact_string(&json!(null)), "null");
1332 assert_eq!(value_to_compact_string(&json!(true)), "true");
1333 assert_eq!(value_to_compact_string(&json!(42)), "42");
1334 assert_eq!(value_to_compact_string(&json!([1, 2, 3])), "[3 items]");
1335 assert_eq!(value_to_compact_string(&json!({"a": 1})), "{1 keys}");
1336 }
1337
1338 #[test]
1341 fn word_abbreviate_replaces_known_words() {
1342 let raw = "The implementation of the configuration is complete.";
1343 let mut c = text_content(raw);
1344 let cfg = enabled_config(json!({}));
1345 WordAbbreviateStage.process(&mut c, &cfg).unwrap();
1346 assert!(c.raw.contains("impl"), "should abbreviate 'implementation': {}", c.raw);
1347 assert!(c.raw.contains("config"), "should abbreviate 'configuration': {}", c.raw);
1348 assert!(!c.raw.contains("implementation"), "original word should be gone: {}", c.raw);
1349 }
1350
1351 #[test]
1352 fn word_abbreviate_preserves_partial_matches() {
1353 let raw = "We need to implement this feature.";
1355 let mut c = text_content(raw);
1356 let cfg = enabled_config(json!({}));
1357 WordAbbreviateStage.process(&mut c, &cfg).unwrap();
1358 assert!(c.raw.contains("implement"), "partial match should be preserved: {}", c.raw);
1359 }
1360
1361 #[test]
1362 fn word_abbreviate_disabled_passthrough() {
1363 let raw = "The implementation is complete.";
1364 let mut c = text_content(raw);
1365 WordAbbreviateStage.process(&mut c, &disabled_config()).unwrap();
1366 assert_eq!(c.raw, raw);
1367 }
1368
1369 #[test]
1370 fn word_abbreviate_skips_json() {
1371 let raw = r#"{"implementation":"value"}"#;
1372 let mut c = json_content(raw);
1373 let cfg = enabled_config(json!({}));
1374 WordAbbreviateStage.process(&mut c, &cfg).unwrap();
1375 assert_eq!(c.raw, raw, "JSON content should pass through unchanged");
1376 }
1377
1378 #[test]
1379 fn word_abbreviate_case_insensitive() {
1380 let raw = "The Implementation and CONFIGURATION are ready.";
1381 let mut c = text_content(raw);
1382 let cfg = enabled_config(json!({}));
1383 WordAbbreviateStage.process(&mut c, &cfg).unwrap();
1384 assert!(c.raw.contains("impl"), "should handle mixed case: {}", c.raw);
1385 assert!(c.raw.contains("config"), "should handle uppercase: {}", c.raw);
1386 }
1387
1388 #[test]
1389 fn replace_whole_word_basic() {
1390 assert_eq!(
1391 replace_whole_word("the implementation is done", "implementation", "impl"),
1392 "the impl is done"
1393 );
1394 }
1395
1396 #[test]
1397 fn replace_whole_word_no_partial() {
1398 let result = replace_whole_word("multiple implementations exist", "implementation", "impl");
1401 assert_eq!(result, "multiple implementations exist");
1404 }
1405
1406 #[test]
1407 fn replace_whole_word_at_boundaries() {
1408 assert_eq!(
1409 replace_whole_word("implementation", "implementation", "impl"),
1410 "impl"
1411 );
1412 assert_eq!(
1413 replace_whole_word("(implementation)", "implementation", "impl"),
1414 "(impl)"
1415 );
1416 }
1417
1418 #[test]
1419 fn replace_whole_word_empty_inputs() {
1420 assert_eq!(replace_whole_word("", "word", "w"), "");
1421 assert_eq!(replace_whole_word("text", "", "w"), "text");
1422 }
1423}