Skip to main content

ics_core/parser/
escape.rs

1//! RFC 5545 §3.3.11 TEXT value escape handling.
2//!
3//! Typed TEXT fields (`SUMMARY`, `DESCRIPTION`, individual `CATEGORIES`
4//! items, etc.) carry the following escape sequences on the wire:
5//!
6//! | wire | meaning |
7//! |---|---|
8//! | `\\` | backslash |
9//! | `\;` | semicolon |
10//! | `\,` | comma |
11//! | `\n` or `\N` | newline (LF) |
12//!
13//! On parse we decode; on format we re-encode. Per ADR-018, `RawProperty.value`
14//! stays raw — escapes are only interpreted for fields the typed model
15//! understands and re-emits.
16
17/// Decode a single TEXT value into a Rust `String`, interpreting the
18/// escape sequences above. Unknown escape sequences (`\X` for `X` not in
19/// the recognized set) pass through with the backslash preserved, so a
20/// future spec evolution can decide their meaning without a corrupting
21/// round-trip in the meantime.
22pub fn decode_text(s: &str) -> String {
23    let mut out = String::with_capacity(s.len());
24    let mut chars = s.chars();
25    while let Some(c) = chars.next() {
26        if c == '\\' {
27            match chars.next() {
28                Some('\\') => out.push('\\'),
29                Some(';') => out.push(';'),
30                Some(',') => out.push(','),
31                Some('n') | Some('N') => out.push('\n'),
32                Some(other) => {
33                    out.push('\\');
34                    out.push(other);
35                }
36                None => out.push('\\'),
37            }
38        } else {
39            out.push(c);
40        }
41    }
42    out
43}
44
45/// Encode a Rust string for the wire, escaping the four reserved
46/// characters that would otherwise be interpreted as structure.
47pub fn encode_text(s: &str) -> String {
48    let mut out = String::with_capacity(s.len());
49    for c in s.chars() {
50        match c {
51            '\\' => out.push_str(r"\\"),
52            ';' => out.push_str(r"\;"),
53            ',' => out.push_str(r"\,"),
54            '\n' => out.push_str(r"\n"),
55            _ => out.push(c),
56        }
57    }
58    out
59}
60
61/// Split a multi-value TEXT property's raw wire value into individual
62/// decoded items, respecting `\,` as a literal comma inside an item.
63pub fn split_text_list(s: &str) -> Vec<String> {
64    let mut items: Vec<String> = Vec::new();
65    let mut current = String::new();
66    let mut chars = s.chars();
67    while let Some(c) = chars.next() {
68        if c == '\\' {
69            current.push('\\');
70            if let Some(next) = chars.next() {
71                current.push(next);
72            }
73        } else if c == ',' {
74            items.push(decode_text(&current));
75            current.clear();
76        } else {
77            current.push(c);
78        }
79    }
80    items.push(decode_text(&current));
81    items
82}
83
84/// Encode an item list into a single TEXT value, comma-joined with each
85/// item internally escaped.
86pub fn join_text_list(items: &[String]) -> String {
87    items
88        .iter()
89        .map(|s| encode_text(s))
90        .collect::<Vec<_>>()
91        .join(",")
92}
93
94#[cfg(test)]
95mod tests {
96    use super::*;
97
98    #[test]
99    fn decode_passes_unescaped_text_through() {
100        assert_eq!(decode_text("hello world"), "hello world");
101    }
102
103    #[test]
104    fn decode_handles_each_recognized_escape() {
105        assert_eq!(decode_text(r"a\\b"), r"a\b");
106        assert_eq!(decode_text(r"a\;b"), "a;b");
107        assert_eq!(decode_text(r"a\,b"), "a,b");
108        assert_eq!(decode_text(r"a\nb"), "a\nb");
109        assert_eq!(decode_text(r"a\Nb"), "a\nb");
110    }
111
112    #[test]
113    fn decode_preserves_unknown_escape_sequences() {
114        // Future-proofing: an unknown \X comes through as the literal
115        // two characters so a re-encode produces the same wire form.
116        assert_eq!(decode_text(r"a\Xb"), r"a\Xb");
117    }
118
119    #[test]
120    fn decode_handles_trailing_lone_backslash() {
121        assert_eq!(decode_text(r"abc\"), r"abc\");
122    }
123
124    #[test]
125    fn encode_escapes_each_reserved_character() {
126        assert_eq!(encode_text(r"a\b"), r"a\\b");
127        assert_eq!(encode_text("a;b"), r"a\;b");
128        assert_eq!(encode_text("a,b"), r"a\,b");
129        assert_eq!(encode_text("a\nb"), r"a\nb");
130    }
131
132    #[test]
133    fn encode_passes_plain_text_through_unchanged() {
134        assert_eq!(encode_text("hello 憲法記念日"), "hello 憲法記念日");
135    }
136
137    #[test]
138    fn decode_then_encode_is_stable_round_trip() {
139        let original = r"Meeting, with \;semicolon and \,comma";
140        let once = decode_text(original);
141        let twice = encode_text(&once);
142        let thrice = decode_text(&twice);
143        // Once-decoded and twice-decoded values agree — the round-trip
144        // through encode is semantically stable.
145        assert_eq!(once, thrice);
146    }
147
148    #[test]
149    fn split_text_list_basic() {
150        assert_eq!(
151            split_text_list("WORK,PERSONAL,HOLIDAY"),
152            vec!["WORK", "PERSONAL", "HOLIDAY"]
153        );
154    }
155
156    #[test]
157    fn split_text_list_respects_escaped_comma() {
158        // "a\,b,c" -> ["a,b", "c"]
159        assert_eq!(split_text_list(r"a\,b,c"), vec!["a,b", "c"]);
160    }
161
162    #[test]
163    fn split_text_list_handles_single_item() {
164        assert_eq!(split_text_list("solo"), vec!["solo"]);
165    }
166
167    #[test]
168    fn join_text_list_escapes_per_item() {
169        let items = vec!["a,b".to_string(), "c;d".to_string()];
170        assert_eq!(join_text_list(&items), r"a\,b,c\;d");
171    }
172
173    #[test]
174    fn list_round_trip_preserves_items_with_special_chars() {
175        let items = vec!["work, project A".to_string(), "personal".to_string()];
176        let encoded = join_text_list(&items);
177        let decoded = split_text_list(&encoded);
178        assert_eq!(decoded, items);
179    }
180}