quoted_string/
unquote.rs

1use spec::{GeneralQSSpec, ScanAutomaton, PartialCodePoint};
2use error::CoreError;
3use std::borrow::Cow;
4
5/// converts a quoted string into it's content
6///
7/// This methods retrieves the content of a quoted-string, which means it strips the
8/// surrounding `'"'`-quoted, converts quoted-pairs into the values they represent and
9/// strips not-semantic character.
10///
11/// # Example
12/// ```
13/// # use std::borrow::Cow;
14/// //use your own Spec in practise
15/// use quoted_string::test_utils::TestSpec;
16/// use quoted_string::to_content;
17///
18/// let content = to_content::<TestSpec>("\"ab\\\"c\n\nde\"")
19///     .expect("only fails if the input is not a quoted string");
20/// assert_eq!(&*content, "ab\"cde");
21///
22/// let content = to_content::<TestSpec>("\"simple\"").unwrap();
23/// // to content will just use slicing to strip `'"'`-quotes if possible
24/// assert_eq!(content, Cow::Borrowed("simple"));
25/// ```
26///
27pub fn to_content<'a, Spec: GeneralQSSpec>(
28    quoted_string: &'a str
29) -> Result<Cow<'a, str>, CoreError>
30{
31    let mut automaton = ScanAutomaton::<Spec::Parsing>::new();
32    let mut continue_copy_from = None;
33    for (idx, bch) in quoted_string.bytes().enumerate() {
34        let emit = automaton.advance(PartialCodePoint::from_utf8_byte(bch))?;
35        if !emit {
36            continue_copy_from = Some(idx);
37            break;
38        }
39    }
40
41    if let Some(idx) = continue_copy_from {
42        let mut buffer = Vec::with_capacity(quoted_string.len()-2);
43        buffer.extend_from_slice(&quoted_string.as_bytes()[0..idx]);
44
45        //SLICE_SAFE: we slice bytes so it's safe
46        for bch in &quoted_string.as_bytes()[idx+1..] {
47            let emit = automaton.advance(PartialCodePoint::from_utf8_byte(*bch))?;
48            if emit {
49                buffer.push(*bch)
50            }
51        }
52
53        automaton.end()?;
54
55        //OPTIMIZE: find a way to make sure we can't brake utf8 with emit while still being
56        // byte based then use `from_utf8_unchecked`
57        // Way 1: check if bch is > 127 if so make sure following UTF8_CONT's are treated the same.
58        //        - needs special handling for the first non emited byte
59        //        - per iter either a is `> 127` or a `& CONT_MASK == CONT_MASK` which can brake
60        //          branch prediction
61        //        + we can skip calling `automaton.advance(bch)` for all continuation bytes
62        let strfied = String::from_utf8(buffer)
63            .expect("[BUG] automaton caused a code point to be only partially emitted");
64
65        Ok(Cow::Owned(strfied))
66
67    } else {
68        automaton.end()?;
69        let len = quoted_string.len();
70        Ok(Cow::Borrowed(&quoted_string[1..len-1]))
71    }
72
73}
74
75/// strips quotes if they exists
76///
77/// returns None if the input does not start with `"` and ends with `"`
78///
79/// # Example
80/// ```
81/// use quoted_string::strip_dquotes;
82/// assert_eq!(strip_dquotes("\"a b\""), Some("a b"));
83/// assert_eq!(strip_dquotes("a b"), None);
84/// assert_eq!(strip_dquotes("\"a b"), None);
85/// assert_eq!(strip_dquotes("a b\""), None);
86/// ```
87pub fn strip_dquotes(quoted_string: &str) -> Option<&str> {
88    let len = quoted_string.len();
89    let bytes = quoted_string.as_bytes();
90    //SLICE_SAFE: && shor circuites if len < 1 and by using bytes there is no problem with utf8
91    // char boundaries
92    if bytes.iter().next() == Some(&b'"') && bytes[len-1] == b'"' {
93        //SLICE_SAFE: [0] and [len-1] are checked to be '"'
94        Some(&quoted_string[1..len-1])
95    } else {
96        None
97    }
98}
99
100
101#[cfg(test)]
102mod test {
103
104    mod to_content {
105        use test_utils::*;
106        use error::CoreError;
107        use std::borrow::Cow;
108        use super::super::to_content;
109
110        #[test]
111        fn no_quotes() {
112            let res = to_content::<TestSpec>("noquotes");
113            assert_eq!(res, Err(CoreError::DoesNotStartWithDQuotes));
114        }
115
116        #[test]
117        fn unnecessary_quoted() {
118            let res = to_content::<TestSpec>(r#""simple""#).unwrap();
119            assert_eq!(res, Cow::Borrowed("simple"))
120        }
121
122        #[test]
123        fn quoted_but_no_quoted_pair() {
124            let res = to_content::<TestSpec>(r#""abc def""#).unwrap();
125            assert_eq!(res, Cow::Borrowed("abc def"))
126        }
127
128        #[test]
129        fn with_quoted_pair() {
130            let res = to_content::<TestSpec>(r#""a\"b""#).unwrap();
131            let expected: Cow<'static, str> = Cow::Owned(r#"a"b"#.into());
132            assert_eq!(res, expected);
133        }
134
135        #[test]
136        fn with_multiple_quoted_pairs() {
137            let res = to_content::<TestSpec>(r#""a\"\bc\ d""#).unwrap();
138            let expected: Cow<'static, str> = Cow::Owned(r#"a"bc d"#.into());
139            assert_eq!(res, expected);
140        }
141
142        #[test]
143        fn empty() {
144            let res = to_content::<TestSpec>(r#""""#).unwrap();
145            assert_eq!(res, Cow::Borrowed(""))
146        }
147
148        #[test]
149        fn strip_non_semantic_ws() {
150            let res = to_content::<TestSpec>("\"hy \n\nthere\"").unwrap();
151            let expected: Cow<'static, str> = Cow::Owned("hy there".into());
152            assert_eq!(res, expected);
153        }
154
155        #[test]
156        fn tailing_escape() {
157            let res = to_content::<TestSpec>(r#""ab\""#);
158            assert_eq!(res, Err(CoreError::DoesNotEndWithDQuotes));
159        }
160
161        #[test]
162        fn missing_escape() {
163            let res = to_content::<TestSpec>("\"a\"\"");
164            assert_eq!(res, Err(CoreError::QuotedStringAlreadyEnded));
165        }
166
167        #[test]
168        fn custom_state_in_parsing_impl_is_used() {
169            let res = to_content::<TestSpec>("\"hy \n+++---\nthere\"").unwrap();
170            let expected: Cow<'static, str> = Cow::Owned("hy there".into());
171            assert_eq!(res, expected);
172
173            let res = to_content::<TestSpec>("\"hy \n+--\nthere\"");
174            assert_eq!(res, Err(CoreError::InvalidChar));
175        }
176    }
177
178
179
180
181
182    mod strip_quotes {
183        use super::super::strip_dquotes;
184
185        #[test]
186        fn empty_string() {
187            assert!(strip_dquotes("").is_none());
188        }
189
190        #[test]
191        fn empty_quoted_string() {
192            assert_eq!(strip_dquotes("\"\""), Some(""));
193        }
194
195        #[test]
196        fn missing_quotes() {
197            assert_eq!(strip_dquotes("\"abc"), None);
198            assert_eq!(strip_dquotes("abc\""), None);
199        }
200
201        #[test]
202        fn simple_string() {
203            assert_eq!(strip_dquotes("\"simple\""), Some("simple"));
204        }
205    }
206
207}