1use crate::error::{Result, SqzError};
2use crate::toon::ToonEncoder;
3use crate::types::{Content, ContentType, StageConfig};
4
5pub trait CompressionStage: Send + Sync {
10 fn name(&self) -> &str;
11 fn priority(&self) -> u32;
12 fn process(&self, content: &mut Content, config: &StageConfig) -> Result<()>;
13}
14
15fn with_json<F>(content: &mut Content, f: F) -> Result<()>
20where
21 F: FnOnce(&mut serde_json::Value) -> Result<()>,
22{
23 if !ToonEncoder::is_json(&content.raw) {
24 return Ok(());
25 }
26 let mut value: serde_json::Value = serde_json::from_str(&content.raw)?;
27 f(&mut value)?;
28 content.raw = serde_json::to_string(&value)?;
29 Ok(())
30}
31
32pub struct KeepFieldsStage;
40
41impl CompressionStage for KeepFieldsStage {
42 fn name(&self) -> &str {
43 "keep_fields"
44 }
45
46 fn priority(&self) -> u32 {
47 10
48 }
49
50 fn process(&self, content: &mut Content, config: &StageConfig) -> Result<()> {
51 if !config.enabled {
52 return Ok(());
53 }
54 let fields: Vec<String> = match config.options.get("fields") {
55 Some(v) => serde_json::from_value(v.clone())
56 .map_err(|e| SqzError::Other(format!("keep_fields: invalid fields option: {e}")))?,
57 None => return Ok(()),
58 };
59 if fields.is_empty() {
60 return Ok(());
61 }
62 with_json(content, |value| {
63 if let serde_json::Value::Object(map) = value {
64 map.retain(|k, _| fields.contains(k));
65 }
66 Ok(())
67 })
68 }
69}
70
71pub struct StripFieldsStage;
80
81fn strip_field_path(value: &mut serde_json::Value, path: &[&str]) {
82 if path.is_empty() {
83 return;
84 }
85 if let serde_json::Value::Object(map) = value {
86 if path.len() == 1 {
87 map.remove(path[0]);
88 } else {
89 if let Some(child) = map.get_mut(path[0]) {
90 strip_field_path(child, &path[1..]);
91 }
92 }
93 }
94}
95
96impl CompressionStage for StripFieldsStage {
97 fn name(&self) -> &str {
98 "strip_fields"
99 }
100
101 fn priority(&self) -> u32 {
102 20
103 }
104
105 fn process(&self, content: &mut Content, config: &StageConfig) -> Result<()> {
106 if !config.enabled {
107 return Ok(());
108 }
109 let fields: Vec<String> = match config.options.get("fields") {
110 Some(v) => serde_json::from_value(v.clone())
111 .map_err(|e| SqzError::Other(format!("strip_fields: invalid fields option: {e}")))?,
112 None => return Ok(()),
113 };
114 if fields.is_empty() {
115 return Ok(());
116 }
117 with_json(content, |value| {
118 for field in &fields {
119 let parts: Vec<&str> = field.split('.').collect();
120 strip_field_path(value, &parts);
121 }
122 Ok(())
123 })
124 }
125}
126
127pub struct CondenseStage;
136
137impl CompressionStage for CondenseStage {
138 fn name(&self) -> &str {
139 "condense"
140 }
141
142 fn priority(&self) -> u32 {
143 30
144 }
145
146 fn process(&self, content: &mut Content, config: &StageConfig) -> Result<()> {
147 if !config.enabled {
148 return Ok(());
149 }
150 match &content.content_type {
152 ContentType::PlainText | ContentType::CliOutput { .. } => {}
153 _ => return Ok(()),
154 }
155
156 let max_repeated: u32 = config
157 .options
158 .get("max_repeated_lines")
159 .and_then(|v| v.as_u64())
160 .map(|v| v as u32)
161 .unwrap_or(3);
162
163 let mut result = Vec::new();
164 let mut current_line: Option<&str> = None;
165 let mut run_count: u32 = 0;
166
167 for line in content.raw.lines() {
168 match current_line {
169 Some(prev) if prev == line => {
170 run_count += 1;
171 if run_count <= max_repeated {
172 result.push(line);
173 }
174 }
175 _ => {
176 current_line = Some(line);
177 run_count = 1;
178 result.push(line);
179 }
180 }
181 }
182
183 let trailing_newline = content.raw.ends_with('\n');
185 content.raw = result.join("\n");
186 if trailing_newline {
187 content.raw.push('\n');
188 }
189 Ok(())
190 }
191}
192
193pub struct StripNullsStage;
201
202fn strip_nulls_recursive(value: &mut serde_json::Value) {
203 match value {
204 serde_json::Value::Object(map) => {
205 map.retain(|_, v| !v.is_null());
206 for v in map.values_mut() {
207 strip_nulls_recursive(v);
208 }
209 }
210 serde_json::Value::Array(arr) => {
211 for item in arr.iter_mut() {
212 strip_nulls_recursive(item);
213 }
214 }
215 _ => {}
216 }
217}
218
219impl CompressionStage for StripNullsStage {
220 fn name(&self) -> &str {
221 "strip_nulls"
222 }
223
224 fn priority(&self) -> u32 {
225 40
226 }
227
228 fn process(&self, content: &mut Content, config: &StageConfig) -> Result<()> {
229 if !config.enabled {
230 return Ok(());
231 }
232 with_json(content, |value| {
233 strip_nulls_recursive(value);
234 Ok(())
235 })
236 }
237}
238
239pub struct FlattenStage;
248
249fn flatten_value(
250 value: &serde_json::Value,
251 prefix: &str,
252 depth: u32,
253 max_depth: u32,
254 out: &mut serde_json::Map<String, serde_json::Value>,
255) {
256 if let serde_json::Value::Object(map) = value {
257 if depth < max_depth {
258 for (k, v) in map {
259 let new_key = if prefix.is_empty() {
260 k.clone()
261 } else {
262 format!("{prefix}.{k}")
263 };
264 flatten_value(v, &new_key, depth + 1, max_depth, out);
265 }
266 return;
267 }
268 }
269 out.insert(prefix.to_owned(), value.clone());
270}
271
272impl CompressionStage for FlattenStage {
273 fn name(&self) -> &str {
274 "flatten"
275 }
276
277 fn priority(&self) -> u32 {
278 50
279 }
280
281 fn process(&self, content: &mut Content, config: &StageConfig) -> Result<()> {
282 if !config.enabled {
283 return Ok(());
284 }
285 let max_depth: u32 = config
286 .options
287 .get("max_depth")
288 .and_then(|v| v.as_u64())
289 .map(|v| v as u32)
290 .unwrap_or(3);
291
292 with_json(content, |value| {
293 if let serde_json::Value::Object(map) = value {
294 let mut out = serde_json::Map::new();
295 for (k, v) in map.iter() {
296 flatten_value(v, k, 1, max_depth, &mut out);
297 }
298 *map = out;
299 }
300 Ok(())
301 })
302 }
303}
304
305pub struct TruncateStringsStage;
314
315fn truncate_strings_recursive(value: &mut serde_json::Value, max_length: usize) {
316 match value {
317 serde_json::Value::String(s) => {
318 if s.chars().count() > max_length {
319 let truncated: String = s.chars().take(max_length).collect();
320 *s = format!("{truncated}...");
321 }
322 }
323 serde_json::Value::Object(map) => {
324 for v in map.values_mut() {
325 truncate_strings_recursive(v, max_length);
326 }
327 }
328 serde_json::Value::Array(arr) => {
329 for item in arr.iter_mut() {
330 truncate_strings_recursive(item, max_length);
331 }
332 }
333 _ => {}
334 }
335}
336
337impl CompressionStage for TruncateStringsStage {
338 fn name(&self) -> &str {
339 "truncate_strings"
340 }
341
342 fn priority(&self) -> u32 {
343 60
344 }
345
346 fn process(&self, content: &mut Content, config: &StageConfig) -> Result<()> {
347 if !config.enabled {
348 return Ok(());
349 }
350 let max_length: usize = config
351 .options
352 .get("max_length")
353 .and_then(|v| v.as_u64())
354 .map(|v| v as usize)
355 .unwrap_or(500);
356
357 with_json(content, |value| {
358 truncate_strings_recursive(value, max_length);
359 Ok(())
360 })
361 }
362}
363
364pub struct CollapseArraysStage;
379
380fn detect_uniform_array(arr: &[serde_json::Value]) -> Option<Vec<String>> {
383 if arr.len() < 2 {
384 return None;
385 }
386
387 let first_keys: Vec<String> = match &arr[0] {
388 serde_json::Value::Object(map) => {
389 if map.is_empty() {
390 return None;
391 }
392 map.keys().cloned().collect()
393 }
394 _ => return None,
395 };
396
397 for item in &arr[1..] {
399 match item {
400 serde_json::Value::Object(map) => {
401 if map.len() != first_keys.len() {
402 return None;
403 }
404 for key in &first_keys {
405 if !map.contains_key(key) {
406 return None;
407 }
408 }
409 }
410 _ => return None,
411 }
412 }
413
414 Some(first_keys)
415}
416
417fn encode_tabular(arr: &[serde_json::Value], keys: &[String]) -> String {
420 let mut lines = Vec::with_capacity(arr.len() + 1);
421
422 lines.push(keys.join(" | "));
424
425 for item in arr {
427 if let serde_json::Value::Object(map) = item {
428 let row: Vec<String> = keys
429 .iter()
430 .map(|k| value_to_compact_string(map.get(k).unwrap_or(&serde_json::Value::Null)))
431 .collect();
432 lines.push(row.join(" | "));
433 }
434 }
435
436 lines.join("\n")
437}
438
439fn value_to_compact_string(v: &serde_json::Value) -> String {
441 match v {
442 serde_json::Value::Null => "null".to_string(),
443 serde_json::Value::Bool(b) => b.to_string(),
444 serde_json::Value::Number(n) => n.to_string(),
445 serde_json::Value::String(s) => {
446 if s.len() > 50 {
447 format!("{}...", &s[..47])
448 } else {
449 s.clone()
450 }
451 }
452 serde_json::Value::Array(a) => format!("[{} items]", a.len()),
453 serde_json::Value::Object(m) => format!("{{{} keys}}", m.len()),
454 }
455}
456
457fn collapse_arrays_recursive(
458 value: &mut serde_json::Value,
459 max_items: usize,
460 summary_template: &str,
461) {
462 match value {
463 serde_json::Value::Array(arr) => {
464 for item in arr.iter_mut() {
466 collapse_arrays_recursive(item, max_items, summary_template);
467 }
468
469 if arr.len() > max_items {
471 if let Some(keys) = detect_uniform_array(arr) {
472 let table = encode_tabular(arr, &keys);
473 let count = arr.len();
474 arr.clear();
475 arr.push(serde_json::Value::String(
476 format!("[table: {count} rows]\n{table}"),
477 ));
478 return;
479 }
480
481 let remaining = arr.len() - max_items;
483 arr.truncate(max_items);
484 let summary = summary_template.replace("{remaining}", &remaining.to_string());
485 arr.push(serde_json::Value::String(summary));
486 }
487 }
488 serde_json::Value::Object(map) => {
489 for v in map.values_mut() {
490 collapse_arrays_recursive(v, max_items, summary_template);
491 }
492 }
493 _ => {}
494 }
495}
496
497impl CompressionStage for CollapseArraysStage {
498 fn name(&self) -> &str {
499 "collapse_arrays"
500 }
501
502 fn priority(&self) -> u32 {
503 70
504 }
505
506 fn process(&self, content: &mut Content, config: &StageConfig) -> Result<()> {
507 if !config.enabled {
508 return Ok(());
509 }
510 let max_items: usize = config
511 .options
512 .get("max_items")
513 .and_then(|v| v.as_u64())
514 .map(|v| v as usize)
515 .unwrap_or(5);
516 let summary_template = config
517 .options
518 .get("summary_template")
519 .and_then(|v| v.as_str())
520 .unwrap_or("... and {remaining} more items")
521 .to_owned();
522
523 with_json(content, |value| {
524 collapse_arrays_recursive(value, max_items, &summary_template);
525 Ok(())
526 })
527 }
528}
529
530pub struct WordAbbreviateStage;
540
541const WORD_ABBREVIATIONS: &[(&str, &str)] = &[
544 ("implementation", "impl"),
545 ("implementations", "impls"),
546 ("configuration", "config"),
547 ("configurations", "configs"),
548 ("authentication", "auth"),
549 ("authorization", "authz"),
550 ("application", "app"),
551 ("applications", "apps"),
552 ("environment", "env"),
553 ("environments", "envs"),
554 ("development", "dev"),
555 ("production", "prod"),
556 ("repository", "repo"),
557 ("repositories", "repos"),
558 ("dependency", "dep"),
559 ("dependencies", "deps"),
560 ("documentation", "docs"),
561 ("information", "info"),
562 ("directory", "dir"),
563 ("directories", "dirs"),
564 ("parameter", "param"),
565 ("parameters", "params"),
566 ("argument", "arg"),
567 ("arguments", "args"),
568 ("function", "fn"),
569 ("functions", "fns"),
570 ("reference", "ref"),
571 ("references", "refs"),
572 ("specification", "spec"),
573 ("specifications", "specs"),
574 ("temporary", "tmp"),
575 ("administrator", "admin"),
576 ("administrators", "admins"),
577 ("database", "db"),
578 ("databases", "dbs"),
579 ("message", "msg"),
580 ("messages", "msgs"),
581 ("response", "resp"),
582 ("request", "req"),
583 ("requests", "reqs"),
584 ("attribute", "attr"),
585 ("attributes", "attrs"),
586 ("expression", "expr"),
587 ("expressions", "exprs"),
588 ("operation", "op"),
589 ("operations", "ops"),
590 ("maximum", "max"),
591 ("minimum", "min"),
592 ("boolean", "bool"),
593 ("integer", "int"),
594 ("previous", "prev"),
595 ("current", "curr"),
596 ("original", "orig"),
597 ("synchronize", "sync"),
598 ("asynchronous", "async"),
599 ("initialize", "init"),
600 ("allocation", "alloc"),
601 ("allocations", "allocs"),
602 ("generation", "gen"),
603 ("miscellaneous", "misc"),
604 ("statistics", "stats"),
605 ("connection", "conn"),
606 ("connections", "conns"),
607 ("transaction", "txn"),
608 ("transactions", "txns"),
609 ("management", "mgmt"),
610 ("notification", "notif"),
611 ("notifications", "notifs"),
612 ("permission", "perm"),
613 ("permissions", "perms"),
614 ("distribution", "distro"),
615 ("distributions", "distros"),
616 ("architecture", "arch"),
617 ("infrastructure", "infra"),
618 ("kubernetes", "k8s"),
619 ("namespace", "ns"),
620 ("namespaces", "nses"),
621 ("container", "ctr"),
622 ("containers", "ctrs"),
623 ("microservice", "svc"),
624 ("microservices", "svcs"),
625];
626
627impl CompressionStage for WordAbbreviateStage {
628 fn name(&self) -> &str {
629 "word_abbreviate"
630 }
631
632 fn priority(&self) -> u32 {
633 25 }
635
636 fn process(&self, content: &mut Content, config: &StageConfig) -> Result<()> {
637 if !config.enabled {
638 return Ok(());
639 }
640 match &content.content_type {
642 ContentType::PlainText | ContentType::CliOutput { .. } => {}
643 _ => return Ok(()),
644 }
645
646 let mut result = content.raw.clone();
647 for &(long, short) in WORD_ABBREVIATIONS {
648 result = replace_whole_word(&result, long, short);
651 }
652
653 content.raw = result;
654 Ok(())
655 }
656}
657
658pub fn abbreviate_words(text: &str) -> String {
663 let mut result = text.to_string();
664 for &(long, short) in WORD_ABBREVIATIONS {
665 result = replace_whole_word(&result, long, short);
666 }
667 result
668}
669
670fn replace_whole_word(text: &str, word: &str, replacement: &str) -> String {
673 if text.is_empty() || word.is_empty() {
674 return text.to_string();
675 }
676
677 let lower = text.to_lowercase();
678 let word_lower = word.to_lowercase();
679 let word_len = word.len();
680 let mut result = String::with_capacity(text.len());
681 let mut last_end = 0;
682
683 let text_bytes = text.as_bytes();
684
685 for (start, _) in lower.match_indices(&word_lower) {
686 let end = start + word_len;
687
688 let before_ok = start == 0
690 || !text_bytes[start - 1].is_ascii_alphanumeric();
691 let after_ok = end >= text.len()
693 || !text_bytes[end].is_ascii_alphanumeric();
694
695 if before_ok && after_ok {
696 result.push_str(&text[last_end..start]);
697 result.push_str(replacement);
698 last_end = end;
699 }
700 }
701
702 result.push_str(&text[last_end..]);
703 result
704}
705
706pub struct GitDiffFoldStage;
719
720impl CompressionStage for GitDiffFoldStage {
721 fn name(&self) -> &str {
722 "git_diff_fold"
723 }
724
725 fn priority(&self) -> u32 {
726 35
727 }
728
729 fn process(&self, content: &mut Content, config: &StageConfig) -> Result<()> {
730 if !config.enabled {
731 return Ok(());
732 }
733 match &content.content_type {
735 ContentType::PlainText | ContentType::CliOutput { .. } => {}
736 _ => return Ok(()),
737 }
738
739 let looks_like_diff = content.raw.starts_with("diff --git ")
744 || content.raw.starts_with("diff -")
745 || content.raw.contains("\n@@ ") || content.raw.contains("\n--- a/") || content.raw.contains("\n+++ b/"); if !looks_like_diff {
750 return Ok(());
751 }
752
753 let max_ctx: usize = config
754 .options
755 .get("max_context_lines")
756 .and_then(|v| v.as_u64())
757 .map(|v| v as usize)
758 .unwrap_or(2);
759
760 let lines: Vec<&str> = content.raw.lines().collect();
761 let n = lines.len();
762
763 let is_changed: Vec<bool> = lines
765 .iter()
766 .map(|l| {
767 l.starts_with('+')
768 || l.starts_with('-')
769 || l.starts_with("@@")
770 || l.starts_with("diff ")
771 || l.starts_with("index ")
772 || l.starts_with("--- ")
773 || l.starts_with("+++ ")
774 })
775 .collect();
776
777 let mut keep = vec![false; n];
779 for i in 0..n {
780 if is_changed[i] {
781 keep[i] = true;
782 for j in i.saturating_sub(max_ctx)..i {
784 keep[j] = true;
785 }
786 for j in (i + 1)..n.min(i + 1 + max_ctx) {
788 keep[j] = true;
789 }
790 }
791 }
792
793 let mut result = Vec::new();
795 let mut fold_count = 0usize;
796
797 for i in 0..n {
798 if keep[i] {
799 if fold_count > 0 {
800 result.push(format!("[{fold_count} unchanged lines]"));
801 fold_count = 0;
802 }
803 result.push(lines[i].to_owned());
804 } else {
805 fold_count += 1;
806 }
807 }
808 if fold_count > 0 {
809 result.push(format!("[{fold_count} unchanged lines]"));
810 }
811
812 let trailing_newline = content.raw.ends_with('\n');
813 content.raw = result.join("\n");
814 if trailing_newline {
815 content.raw.push('\n');
816 }
817 Ok(())
818 }
819}
820
821pub struct CustomTransformsStage;
828
829impl CompressionStage for CustomTransformsStage {
830 fn name(&self) -> &str {
831 "custom_transforms"
832 }
833
834 fn priority(&self) -> u32 {
835 80
836 }
837
838 fn process(&self, _content: &mut Content, config: &StageConfig) -> Result<()> {
839 if !config.enabled {
840 return Ok(());
841 }
842 Ok(())
844 }
845}
846
847#[cfg(test)]
852mod tests {
853 use super::*;
854 use crate::types::{ContentMetadata, ContentType};
855 use serde_json::json;
856
857 fn json_content(raw: &str) -> Content {
858 Content {
859 raw: raw.to_owned(),
860 content_type: ContentType::Json,
861 metadata: ContentMetadata {
862 source: None,
863 path: None,
864 language: None,
865 },
866 tokens_original: 0,
867 }
868 }
869
870 fn text_content(raw: &str) -> Content {
871 Content {
872 raw: raw.to_owned(),
873 content_type: ContentType::PlainText,
874 metadata: ContentMetadata {
875 source: None,
876 path: None,
877 language: None,
878 },
879 tokens_original: 0,
880 }
881 }
882
883 fn enabled_config(options: serde_json::Value) -> StageConfig {
884 StageConfig {
885 enabled: true,
886 options,
887 }
888 }
889
890 fn disabled_config() -> StageConfig {
891 StageConfig {
892 enabled: false,
893 options: json!({}),
894 }
895 }
896
897 #[test]
900 fn keep_fields_retains_specified() {
901 let mut c = json_content(r#"{"id":1,"name":"Alice","debug":"x"}"#);
902 let cfg = enabled_config(json!({"fields": ["id", "name"]}));
903 KeepFieldsStage.process(&mut c, &cfg).unwrap();
904 let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
905 assert_eq!(v, json!({"id":1,"name":"Alice"}));
906 }
907
908 #[test]
909 fn keep_fields_disabled_passthrough() {
910 let raw = r#"{"id":1,"name":"Alice"}"#;
911 let mut c = json_content(raw);
912 KeepFieldsStage.process(&mut c, &disabled_config()).unwrap();
913 assert_eq!(c.raw, raw);
914 }
915
916 #[test]
917 fn keep_fields_non_json_passthrough() {
918 let raw = "not json at all";
919 let mut c = text_content(raw);
920 let cfg = enabled_config(json!({"fields": ["id"]}));
921 KeepFieldsStage.process(&mut c, &cfg).unwrap();
922 assert_eq!(c.raw, raw);
923 }
924
925 #[test]
928 fn strip_fields_removes_top_level() {
929 let mut c = json_content(r#"{"id":1,"debug":"x","name":"Bob"}"#);
930 let cfg = enabled_config(json!({"fields": ["debug"]}));
931 StripFieldsStage.process(&mut c, &cfg).unwrap();
932 let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
933 assert_eq!(v, json!({"id":1,"name":"Bob"}));
934 }
935
936 #[test]
937 fn strip_fields_dot_notation() {
938 let mut c = json_content(r#"{"metadata":{"internal_id":"x","public":"y"},"name":"Bob"}"#);
939 let cfg = enabled_config(json!({"fields": ["metadata.internal_id"]}));
940 StripFieldsStage.process(&mut c, &cfg).unwrap();
941 let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
942 assert_eq!(v, json!({"metadata":{"public":"y"},"name":"Bob"}));
943 }
944
945 #[test]
946 fn strip_fields_disabled_passthrough() {
947 let raw = r#"{"id":1}"#;
948 let mut c = json_content(raw);
949 StripFieldsStage.process(&mut c, &disabled_config()).unwrap();
950 assert_eq!(c.raw, raw);
951 }
952
953 #[test]
956 fn condense_collapses_repeated_lines() {
957 let raw = "a\na\na\na\na\nb\n";
958 let mut c = text_content(raw);
959 let cfg = enabled_config(json!({"max_repeated_lines": 3}));
960 CondenseStage.process(&mut c, &cfg).unwrap();
961 assert_eq!(c.raw, "a\na\na\nb\n");
962 }
963
964 #[test]
965 fn condense_keeps_up_to_max() {
966 let raw = "x\nx\nx\n";
967 let mut c = text_content(raw);
968 let cfg = enabled_config(json!({"max_repeated_lines": 3}));
969 CondenseStage.process(&mut c, &cfg).unwrap();
970 assert_eq!(c.raw, "x\nx\nx\n");
971 }
972
973 #[test]
974 fn condense_disabled_passthrough() {
975 let raw = "a\na\na\na\n";
976 let mut c = text_content(raw);
977 CondenseStage.process(&mut c, &disabled_config()).unwrap();
978 assert_eq!(c.raw, raw);
979 }
980
981 #[test]
982 fn condense_skips_json() {
983 let raw = r#"{"a":1}"#;
984 let mut c = json_content(raw);
985 let cfg = enabled_config(json!({"max_repeated_lines": 1}));
986 CondenseStage.process(&mut c, &cfg).unwrap();
987 assert_eq!(c.raw, raw);
988 }
989
990 #[test]
993 fn strip_nulls_removes_null_fields() {
994 let mut c = json_content(r#"{"a":1,"b":null,"c":"x"}"#);
995 let cfg = enabled_config(json!({}));
996 StripNullsStage.process(&mut c, &cfg).unwrap();
997 let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
998 assert_eq!(v, json!({"a":1,"c":"x"}));
999 }
1000
1001 #[test]
1002 fn strip_nulls_recursive() {
1003 let mut c = json_content(r#"{"a":{"b":null,"c":1}}"#);
1004 let cfg = enabled_config(json!({}));
1005 StripNullsStage.process(&mut c, &cfg).unwrap();
1006 let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
1007 assert_eq!(v, json!({"a":{"c":1}}));
1008 }
1009
1010 #[test]
1011 fn strip_nulls_keeps_null_in_arrays() {
1012 let mut c = json_content(r#"{"arr":[1,null,2]}"#);
1013 let cfg = enabled_config(json!({}));
1014 StripNullsStage.process(&mut c, &cfg).unwrap();
1015 let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
1016 assert_eq!(v, json!({"arr":[1,null,2]}));
1017 }
1018
1019 #[test]
1020 fn strip_nulls_disabled_passthrough() {
1021 let raw = r#"{"a":null}"#;
1022 let mut c = json_content(raw);
1023 StripNullsStage.process(&mut c, &disabled_config()).unwrap();
1024 assert_eq!(c.raw, raw);
1025 }
1026
1027 #[test]
1030 fn flatten_nested_object() {
1031 let mut c = json_content(r#"{"a":{"b":{"c":1}}}"#);
1032 let cfg = enabled_config(json!({"max_depth": 3}));
1033 FlattenStage.process(&mut c, &cfg).unwrap();
1034 let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
1035 assert_eq!(v, json!({"a.b.c":1}));
1036 }
1037
1038 #[test]
1039 fn flatten_respects_max_depth() {
1040 let mut c = json_content(r#"{"a":{"b":{"c":1}}}"#);
1041 let cfg = enabled_config(json!({"max_depth": 1}));
1042 FlattenStage.process(&mut c, &cfg).unwrap();
1043 let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
1044 assert_eq!(v, json!({"a":{"b":{"c":1}}}));
1046 }
1047
1048 #[test]
1049 fn flatten_disabled_passthrough() {
1050 let raw = r#"{"a":{"b":1}}"#;
1051 let mut c = json_content(raw);
1052 FlattenStage.process(&mut c, &disabled_config()).unwrap();
1053 assert_eq!(c.raw, raw);
1054 }
1055
1056 #[test]
1059 fn truncate_strings_long_value() {
1060 let long = "a".repeat(600);
1061 let raw = format!(r#"{{"key":"{}"}}"#, long);
1062 let mut c = json_content(&raw);
1063 let cfg = enabled_config(json!({"max_length": 500}));
1064 TruncateStringsStage.process(&mut c, &cfg).unwrap();
1065 let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
1066 let s = v["key"].as_str().unwrap();
1067 assert!(s.ends_with("..."));
1068 assert_eq!(s.chars().count(), 503); }
1070
1071 #[test]
1072 fn truncate_strings_short_value_unchanged() {
1073 let raw = r#"{"key":"hello"}"#;
1074 let mut c = json_content(raw);
1075 let cfg = enabled_config(json!({"max_length": 500}));
1076 TruncateStringsStage.process(&mut c, &cfg).unwrap();
1077 let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
1078 assert_eq!(v["key"].as_str().unwrap(), "hello");
1079 }
1080
1081 #[test]
1082 fn truncate_strings_disabled_passthrough() {
1083 let long = "a".repeat(600);
1084 let raw = format!(r#"{{"key":"{}"}}"#, long);
1085 let mut c = json_content(&raw);
1086 TruncateStringsStage.process(&mut c, &disabled_config()).unwrap();
1087 assert_eq!(c.raw, raw);
1088 }
1089
1090 #[test]
1093 fn collapse_arrays_truncates_long_array() {
1094 let mut c = json_content(r#"{"items":[1,2,3,4,5,6,7]}"#);
1095 let cfg = enabled_config(json!({
1096 "max_items": 5,
1097 "summary_template": "... and {remaining} more items"
1098 }));
1099 CollapseArraysStage.process(&mut c, &cfg).unwrap();
1100 let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
1101 let arr = v["items"].as_array().unwrap();
1102 assert_eq!(arr.len(), 6); assert_eq!(arr[5].as_str().unwrap(), "... and 2 more items");
1104 }
1105
1106 #[test]
1107 fn collapse_arrays_short_array_unchanged() {
1108 let raw = r#"{"items":[1,2,3]}"#;
1109 let mut c = json_content(raw);
1110 let cfg = enabled_config(json!({"max_items": 5}));
1111 CollapseArraysStage.process(&mut c, &cfg).unwrap();
1112 let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
1113 assert_eq!(v["items"].as_array().unwrap().len(), 3);
1114 }
1115
1116 #[test]
1117 fn collapse_arrays_disabled_passthrough() {
1118 let raw = r#"{"items":[1,2,3,4,5,6,7]}"#;
1119 let mut c = json_content(raw);
1120 CollapseArraysStage.process(&mut c, &disabled_config()).unwrap();
1121 assert_eq!(c.raw, raw);
1122 }
1123
1124 #[test]
1127 fn git_diff_fold_folds_unchanged_lines() {
1128 let diff = concat!(
1130 "diff --git a/src/main.rs b/src/main.rs\n",
1131 "--- a/src/main.rs\n",
1132 "+++ b/src/main.rs\n",
1133 "@@ -1,12 +1,12 @@\n",
1134 " line1\n",
1135 " line2\n",
1136 " line3\n",
1137 " line4\n",
1138 " line5\n",
1139 " line6\n",
1140 "-old line\n",
1141 "+new line\n",
1142 " line7\n",
1143 " line8\n",
1144 " line9\n",
1145 " line10\n",
1146 " line11\n",
1147 " line12\n",
1148 );
1149 let mut c = text_content(diff);
1150 let cfg = enabled_config(serde_json::json!({"max_context_lines": 2}));
1151 GitDiffFoldStage.process(&mut c, &cfg).unwrap();
1152 assert!(c.raw.contains("-old line"), "output: {}", c.raw);
1154 assert!(c.raw.contains("+new line"), "output: {}", c.raw);
1155 assert!(c.raw.contains("@@ -1,12"), "output: {}", c.raw);
1157 assert!(c.raw.len() < diff.len(), "output should be shorter, got:\n{}", c.raw);
1159 assert!(c.raw.contains("unchanged lines"), "expected fold markers in:\n{}", c.raw);
1161 }
1162
1163 #[test]
1164 fn git_diff_fold_preserves_hunk_headers() {
1165 let diff = "@@ -1,5 +1,5 @@\n unchanged\n-old\n+new\n unchanged\n";
1166 let mut c = text_content(diff);
1167 let cfg = enabled_config(serde_json::json!({"max_context_lines": 1}));
1168 GitDiffFoldStage.process(&mut c, &cfg).unwrap();
1169 assert!(c.raw.contains("@@ -1,5 +1,5 @@"), "output: {}", c.raw);
1170 }
1171
1172 #[test]
1173 fn git_diff_fold_skips_non_diff_text() {
1174 let raw = "just some plain text\nno diff markers here\n";
1175 let mut c = text_content(raw);
1176 let cfg = enabled_config(serde_json::json!({"max_context_lines": 2}));
1177 GitDiffFoldStage.process(&mut c, &cfg).unwrap();
1178 assert_eq!(c.raw, raw);
1179 }
1180
1181 #[test]
1182 fn git_diff_fold_disabled_passthrough() {
1183 let diff = "diff --git a/f b/f\n-old\n+new\n unchanged\n unchanged\n unchanged\n";
1184 let mut c = text_content(diff);
1185 GitDiffFoldStage.process(&mut c, &disabled_config()).unwrap();
1186 assert_eq!(c.raw, diff);
1187 }
1188
1189 #[test]
1197 fn git_diff_fold_does_not_fold_ls_output() {
1198 let ls_output = concat!(
1199 "total 24\n",
1200 "drwxr-xr-x 6 user user 192 Apr 18 10:00 packages\n",
1201 "drwxr-xr-x 3 user user 96 Apr 18 10:00 configuration\n",
1202 "drwxr-xr-x 4 user user 128 Apr 18 10:00 documentation\n",
1203 "drwxr-xr-x 2 user user 64 Apr 18 10:00 environment\n",
1204 "-rw-r--r-- 1 user user 1024 Apr 18 10:00 README.md\n",
1205 );
1206 let mut c = text_content(ls_output);
1207 let cfg = enabled_config(serde_json::json!({"max_context_lines": 2}));
1208 GitDiffFoldStage.process(&mut c, &cfg).unwrap();
1209 assert!(c.raw.contains("packages"), "packages must survive: {}", c.raw);
1211 assert!(c.raw.contains("configuration"), "configuration must survive: {}", c.raw);
1212 assert!(c.raw.contains("documentation"), "documentation must survive: {}", c.raw);
1213 assert!(c.raw.contains("environment"), "environment must survive: {}", c.raw);
1214 assert!(c.raw.contains("README.md"), "README.md must survive: {}", c.raw);
1215 assert!(!c.raw.contains("unchanged lines"), "no folding should occur: {}", c.raw);
1216 }
1217
1218 #[test]
1219 fn git_diff_fold_does_not_fold_markdown_bullets() {
1220 let markdown = concat!(
1221 "# Features\n",
1222 "\n",
1223 "- First feature\n",
1224 "- Second feature\n",
1225 "- Third feature\n",
1226 "+ Added bonus\n",
1227 "\n",
1228 "## Details\n",
1229 );
1230 let mut c = text_content(markdown);
1231 let cfg = enabled_config(serde_json::json!({"max_context_lines": 2}));
1232 GitDiffFoldStage.process(&mut c, &cfg).unwrap();
1233 assert_eq!(c.raw, markdown, "markdown should pass through unchanged");
1234 }
1235
1236 #[test]
1237 fn git_diff_fold_still_works_on_real_diffs() {
1238 let diff = concat!(
1240 "diff --git a/src/main.rs b/src/main.rs\n",
1241 "--- a/src/main.rs\n",
1242 "+++ b/src/main.rs\n",
1243 "@@ -1,10 +1,10 @@\n",
1244 " line1\n",
1245 " line2\n",
1246 " line3\n",
1247 " line4\n",
1248 " line5\n",
1249 "-old line\n",
1250 "+new line\n",
1251 " line6\n",
1252 " line7\n",
1253 " line8\n",
1254 " line9\n",
1255 " line10\n",
1256 );
1257 let mut c = text_content(diff);
1258 let cfg = enabled_config(serde_json::json!({"max_context_lines": 2}));
1259 GitDiffFoldStage.process(&mut c, &cfg).unwrap();
1260 assert!(c.raw.contains("-old line"), "removed line preserved: {}", c.raw);
1262 assert!(c.raw.contains("+new line"), "added line preserved: {}", c.raw);
1263 assert!(c.raw.contains("unchanged lines"), "should fold context: {}", c.raw);
1265 assert!(
1267 c.raw.lines().count() < diff.lines().count(),
1268 "output should have fewer lines: {} vs {}",
1269 c.raw.lines().count(), diff.lines().count()
1270 );
1271 }
1272
1273 #[test]
1276 fn custom_transforms_is_noop() {
1277 let raw = r#"{"a":1}"#;
1278 let mut c = json_content(raw);
1279 let cfg = enabled_config(json!({}));
1280 CustomTransformsStage.process(&mut c, &cfg).unwrap();
1281 assert_eq!(c.raw, raw);
1282 }
1283
1284 #[test]
1285 fn custom_transforms_disabled_passthrough() {
1286 let raw = "some text";
1287 let mut c = text_content(raw);
1288 CustomTransformsStage.process(&mut c, &disabled_config()).unwrap();
1289 assert_eq!(c.raw, raw);
1290 }
1291
1292 #[test]
1295 fn collapse_arrays_tabular_encoding_uniform_objects() {
1296 let raw = r#"{"users":[
1298 {"id":1,"name":"Alice","role":"admin"},
1299 {"id":2,"name":"Bob","role":"user"},
1300 {"id":3,"name":"Carol","role":"user"},
1301 {"id":4,"name":"Dave","role":"admin"},
1302 {"id":5,"name":"Eve","role":"user"},
1303 {"id":6,"name":"Frank","role":"user"}
1304 ]}"#;
1305 let mut c = json_content(raw);
1306 let cfg = enabled_config(json!({
1307 "max_items": 3,
1308 "summary_template": "... and {remaining} more items"
1309 }));
1310 CollapseArraysStage.process(&mut c, &cfg).unwrap();
1311 let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
1312 let arr = v["users"].as_array().unwrap();
1313 assert_eq!(arr.len(), 1, "uniform array should be encoded as single table element");
1315 let table_str = arr[0].as_str().unwrap();
1316 assert!(table_str.contains("[table: 6 rows]"), "should contain row count: {}", table_str);
1317 assert!(table_str.contains("Alice"), "should contain data: {}", table_str);
1318 assert!(table_str.contains("Frank"), "should contain all rows: {}", table_str);
1319 }
1320
1321 #[test]
1322 fn collapse_arrays_mixed_objects_falls_back_to_truncation() {
1323 let raw = r#"{"items":[
1325 {"id":1,"name":"Alice"},
1326 {"x":2,"y":3},
1327 {"id":3,"name":"Carol"},
1328 {"x":4,"y":5},
1329 {"id":5,"name":"Eve"},
1330 {"x":6,"y":7}
1331 ]}"#;
1332 let mut c = json_content(raw);
1333 let cfg = enabled_config(json!({
1334 "max_items": 3,
1335 "summary_template": "... and {remaining} more items"
1336 }));
1337 CollapseArraysStage.process(&mut c, &cfg).unwrap();
1338 let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
1339 let arr = v["items"].as_array().unwrap();
1340 assert_eq!(arr.len(), 4);
1342 assert!(arr[3].as_str().unwrap().contains("3 more items"));
1343 }
1344
1345 #[test]
1346 fn collapse_arrays_small_uniform_array_unchanged() {
1347 let raw = r#"{"users":[{"id":1,"name":"Alice"},{"id":2,"name":"Bob"}]}"#;
1349 let mut c = json_content(raw);
1350 let cfg = enabled_config(json!({"max_items": 5}));
1351 CollapseArraysStage.process(&mut c, &cfg).unwrap();
1352 let v: serde_json::Value = serde_json::from_str(&c.raw).unwrap();
1353 assert_eq!(v["users"].as_array().unwrap().len(), 2);
1354 }
1355
1356 #[test]
1357 fn detect_uniform_array_returns_keys_for_uniform() {
1358 let arr = vec![
1359 json!({"a": 1, "b": 2}),
1360 json!({"a": 3, "b": 4}),
1361 ];
1362 let keys = detect_uniform_array(&arr);
1363 assert!(keys.is_some());
1364 let keys = keys.unwrap();
1365 assert!(keys.contains(&"a".to_string()));
1366 assert!(keys.contains(&"b".to_string()));
1367 }
1368
1369 #[test]
1370 fn detect_uniform_array_returns_none_for_mixed() {
1371 let arr = vec![
1372 json!({"a": 1, "b": 2}),
1373 json!({"x": 3, "y": 4}),
1374 ];
1375 assert!(detect_uniform_array(&arr).is_none());
1376 }
1377
1378 #[test]
1379 fn detect_uniform_array_returns_none_for_non_objects() {
1380 let arr = vec![json!(1), json!(2), json!(3)];
1381 assert!(detect_uniform_array(&arr).is_none());
1382 }
1383
1384 #[test]
1385 fn detect_uniform_array_returns_none_for_single_element() {
1386 let arr = vec![json!({"a": 1})];
1387 assert!(detect_uniform_array(&arr).is_none());
1388 }
1389
1390 #[test]
1391 fn value_to_compact_string_truncates_long_strings() {
1392 let long = "a".repeat(100);
1393 let v = serde_json::Value::String(long);
1394 let s = value_to_compact_string(&v);
1395 assert!(s.len() <= 53); assert!(s.ends_with("..."));
1397 }
1398
1399 #[test]
1400 fn value_to_compact_string_short_string_unchanged() {
1401 let v = serde_json::Value::String("hello".to_string());
1402 assert_eq!(value_to_compact_string(&v), "hello");
1403 }
1404
1405 #[test]
1406 fn value_to_compact_string_nested_types() {
1407 assert_eq!(value_to_compact_string(&json!(null)), "null");
1408 assert_eq!(value_to_compact_string(&json!(true)), "true");
1409 assert_eq!(value_to_compact_string(&json!(42)), "42");
1410 assert_eq!(value_to_compact_string(&json!([1, 2, 3])), "[3 items]");
1411 assert_eq!(value_to_compact_string(&json!({"a": 1})), "{1 keys}");
1412 }
1413
1414 #[test]
1417 fn word_abbreviate_replaces_known_words() {
1418 let raw = "The implementation of the configuration is complete.";
1419 let mut c = text_content(raw);
1420 let cfg = enabled_config(json!({}));
1421 WordAbbreviateStage.process(&mut c, &cfg).unwrap();
1422 assert!(c.raw.contains("impl"), "should abbreviate 'implementation': {}", c.raw);
1423 assert!(c.raw.contains("config"), "should abbreviate 'configuration': {}", c.raw);
1424 assert!(!c.raw.contains("implementation"), "original word should be gone: {}", c.raw);
1425 }
1426
1427 #[test]
1428 fn word_abbreviate_preserves_partial_matches() {
1429 let raw = "We need to implement this feature.";
1431 let mut c = text_content(raw);
1432 let cfg = enabled_config(json!({}));
1433 WordAbbreviateStage.process(&mut c, &cfg).unwrap();
1434 assert!(c.raw.contains("implement"), "partial match should be preserved: {}", c.raw);
1435 }
1436
1437 #[test]
1438 fn word_abbreviate_disabled_passthrough() {
1439 let raw = "The implementation is complete.";
1440 let mut c = text_content(raw);
1441 WordAbbreviateStage.process(&mut c, &disabled_config()).unwrap();
1442 assert_eq!(c.raw, raw);
1443 }
1444
1445 #[test]
1446 fn word_abbreviate_skips_json() {
1447 let raw = r#"{"implementation":"value"}"#;
1448 let mut c = json_content(raw);
1449 let cfg = enabled_config(json!({}));
1450 WordAbbreviateStage.process(&mut c, &cfg).unwrap();
1451 assert_eq!(c.raw, raw, "JSON content should pass through unchanged");
1452 }
1453
1454 #[test]
1455 fn word_abbreviate_case_insensitive() {
1456 let raw = "The Implementation and CONFIGURATION are ready.";
1457 let mut c = text_content(raw);
1458 let cfg = enabled_config(json!({}));
1459 WordAbbreviateStage.process(&mut c, &cfg).unwrap();
1460 assert!(c.raw.contains("impl"), "should handle mixed case: {}", c.raw);
1461 assert!(c.raw.contains("config"), "should handle uppercase: {}", c.raw);
1462 }
1463
1464 #[test]
1465 fn replace_whole_word_basic() {
1466 assert_eq!(
1467 replace_whole_word("the implementation is done", "implementation", "impl"),
1468 "the impl is done"
1469 );
1470 }
1471
1472 #[test]
1473 fn replace_whole_word_no_partial() {
1474 let result = replace_whole_word("multiple implementations exist", "implementation", "impl");
1477 assert_eq!(result, "multiple implementations exist");
1480 }
1481
1482 #[test]
1483 fn replace_whole_word_at_boundaries() {
1484 assert_eq!(
1485 replace_whole_word("implementation", "implementation", "impl"),
1486 "impl"
1487 );
1488 assert_eq!(
1489 replace_whole_word("(implementation)", "implementation", "impl"),
1490 "(impl)"
1491 );
1492 }
1493
1494 #[test]
1495 fn replace_whole_word_empty_inputs() {
1496 assert_eq!(replace_whole_word("", "word", "w"), "");
1497 assert_eq!(replace_whole_word("text", "", "w"), "text");
1498 }
1499}