dysql_tpl/
encoding.rs

1// Ramhorns  Copyright (C) 2019  Maciej Hirsz
2//
3// This file is part of Ramhorns. This program comes with ABSOLUTELY NO WARRANTY;
4// This is free software, and you are welcome to redistribute it under the
5// conditions of the GNU General Public License version 3.0.
6//
7// You should have received a copy of the GNU General Public License
8// along with Ramhorns.  If not, see <http://www.gnu.org/licenses/>
9
10//! Utilities dealing with writing the bits of a template or data to the output and
11//! escaping special HTML characters.
12
13use std::io;
14use std::fmt;
15
16#[cfg(feature = "pulldown-cmark")]
17use pulldown_cmark::{html, Event, Parser};
18
19use crate::SimpleError;
20use crate::SimpleInnerError;
21
22/// A trait that wraps around either a `String` or `std::io::Write`, providing UTF-8 safe
23/// writing boundaries and special HTML character escaping.
24pub trait Encoder {
25    /// Error type for this encoder
26    type Error;
27
28    /// Write a `&str` to this `Encoder` in plain mode.
29    fn write_unescaped(&mut self, part: &str) -> Result<(), Self::Error>;
30
31    /// Write a `&str` to this `Encoder`, escaping special HTML characters.
32    fn write_escaped(&mut self, part: &str) -> Result<(), Self::Error>;
33
34    #[cfg(feature = "pulldown-cmark")]
35    /// Write HTML from an `Iterator` of `pulldown_cmark` `Event`s.
36    fn write_html<'b, I: Iterator<Item = Event<'b>>>(&mut self, iter: I) -> Result<(), Self::Error>;
37
38    /// Write a `Display` implementor to this `Encoder` in plain mode.
39    fn format_unescaped<D: fmt::Display>(&mut self, display: D) -> Result<(), Self::Error>;
40
41    /// Write a `Display` implementor to this `Encoder`, escaping special HTML characters.
42    fn format_escaped<D: fmt::Display>(&mut self, display: D) -> Result<(), Self::Error>;
43}
44
45/// Local helper for escaping stuff into strings.
46struct EscapingStringEncoder<'a>(&'a mut String);
47
48impl<'a> EscapingStringEncoder<'a> {
49    /// Write with escaping special HTML characters. Since we are dealing
50    /// with a String, we don't need to return a `Result`.
51    fn write_escaped(&mut self, part: &str) {
52        let mut start = 0;
53
54        for (idx, byte) in part.bytes().enumerate() {
55            let replace = match byte {
56                b'<' => "&lt;",
57                b'>' => "&gt;",
58                b'&' => "&amp;",
59                b'"' => "&quot;",
60                _ => continue,
61            };
62
63            self.0.push_str(&part[start..idx]);
64            self.0.push_str(replace);
65
66            start = idx + 1;
67        }
68
69        self.0.push_str(&part[start..]);
70    }
71}
72
73/// Provide a `fmt::Write` interface, so we can use `write!` macro.
74impl<'a> fmt::Write for EscapingStringEncoder<'a> {
75    #[inline]
76    fn write_str(&mut self, part: &str) -> fmt::Result {
77        self.write_escaped(part);
78
79        Ok(())
80    }
81}
82
83/// Encoder wrapper around io::Write. We can't implement `Encoder` on a generic here,
84/// because we're implementing it directly for `String`.
85pub(crate) struct EscapingIOEncoder<W: io::Write> {
86    inner: W,
87}
88
89impl<W: io::Write> EscapingIOEncoder<W> {
90    #[inline]
91    pub fn new(inner: W) -> Self {
92        Self {
93            inner
94        }
95    }
96
97    /// Same as `EscapingStringEncoder`, but dealing with byte arrays and writing to
98    /// the inner `io::Write`.
99    fn write_escaped_bytes(&mut self, part: &[u8]) -> io::Result<()> {
100        let mut start = 0;
101
102        for (idx, byte) in part.iter().enumerate() {
103            let replace: &[u8] = match *byte {
104                b'<' => b"&lt;",
105                b'>' => b"&gt;",
106                b'&' => b"&amp;",
107                b'"' => b"&quot;",
108                _ => continue,
109            };
110
111            self.inner.write_all(&part[start..idx])?;
112            self.inner.write_all(replace)?;
113
114            start = idx + 1;
115        }
116
117        self.inner.write_all(&part[start..])
118    }
119}
120
121// Additionally we implement `io::Write` for it directly. This allows us to use
122// the `write!` macro for formatting without allocations.
123impl<W: io::Write> io::Write for EscapingIOEncoder<W> {
124    #[inline]
125    fn write(&mut self, part: &[u8]) -> io::Result<usize> {
126        self.write_escaped_bytes(part).map(|()| part.len())
127    }
128
129    #[inline]
130    fn write_all(&mut self, part: &[u8]) -> io::Result<()> {
131        self.write_escaped_bytes(part)
132    }
133
134    #[inline]
135    fn flush(&mut self) -> io::Result<()> {
136        Ok(())
137    }
138}
139
140impl<W: io::Write> Encoder for EscapingIOEncoder<W> {
141    type Error = io::Error;
142
143    #[inline]
144    fn write_unescaped(&mut self, part: &str) -> io::Result<()> {
145        self.inner.write_all(part.as_bytes())
146    }
147
148    #[inline]
149    fn write_escaped(&mut self, part: &str) -> io::Result<()> {
150        self.write_escaped_bytes(part.as_bytes())
151    }
152
153    #[cfg(feature = "pulldown-cmark")]
154    #[inline]
155    fn write_html<'b, I: Iterator<Item = Event<'b>>>(&mut self, iter: I) -> io::Result<()> {
156        html::write_html(&mut self.inner, iter)
157    }
158
159    #[inline]
160    fn format_unescaped<D: fmt::Display>(&mut self, display: D) -> Result<(), Self::Error> {
161        write!(self.inner, "{}", display)
162    }
163
164    #[inline]
165    fn format_escaped<D: fmt::Display>(&mut self, display: D) -> Result<(), Self::Error> {
166        use io::Write;
167
168        write!(self, "{}", display)
169    }
170}
171
172/// Error type for `String`, impossible to instantiate.
173/// Rust optimizes `Result<(), NeverError>` to 0-size.
174pub enum NeverError {}
175
176impl Encoder for String {
177    // Change this to `!` once stabilized.
178    type Error = NeverError;
179
180    #[inline]
181    fn write_unescaped(&mut self, part: &str) -> Result<(), Self::Error> {
182        self.push_str(part);
183
184        Ok(())
185    }
186
187    #[inline]
188    fn write_escaped(&mut self, part: &str) -> Result<(), Self::Error> {
189        EscapingStringEncoder(self).write_escaped(part);
190
191        Ok(())
192    }
193
194    #[cfg(feature = "pulldown-cmark")]
195    #[inline]
196    fn write_html<'b, I: Iterator<Item = Event<'b>>>(&mut self, iter: I) -> Result<(), Self::Error> {
197        html::push_html(self, iter);
198
199        Ok(())
200    }
201
202    #[inline]
203    fn format_unescaped<D: fmt::Display>(&mut self, display: D) -> Result<(), Self::Error> {
204        use std::fmt::Write;
205
206        // Never fails for a string
207        let _ = write!(self, "{}", display);
208
209        Ok(())
210    }
211
212    #[inline]
213    fn format_escaped<D: fmt::Display>(&mut self, display: D) -> Result<(), Self::Error> {
214        use std::fmt::Write;
215
216        // Never fails for a string
217        let _ = write!(EscapingStringEncoder(self), "{}", display);
218
219        Ok(())
220    }
221}
222
223#[cfg(feature = "pulldown-cmark")]
224/// Parse and encode the markdown using pulldown_cmark
225pub fn encode_cmark<E: Encoder>(source: &str, encoder: &mut E) -> Result<(), E::Error> {
226    let parser = Parser::new(source);
227
228    encoder.write_html(parser)
229}
230
231enum Token<'a> {
232    // (含控制符的 len, token str)
233    DEL(usize, &'a str), 
234    Normal(usize, &'a str),
235}
236
237const BLANKET_CHARS: [u8; 3] = [b' ', b'\n', b'\t'];
238
239pub(crate) struct SqlEncoder
240{
241    pub inner: String,
242    pub trim_token: Option<String>,
243}
244
245impl SqlEncoder {
246    pub fn with_capacity(capacity: usize) -> Self {
247        Self {
248            inner: String::with_capacity(capacity),
249            trim_token: None,
250        }
251    }
252
253    pub fn trim(mut self) -> String {
254        if self.inner.len() > 0 {
255            self.inner.pop();
256        }
257        self.inner
258    }
259
260    fn trim_sql(&mut self, sql: &str) -> Result<(), SimpleError>
261    {
262        let sql_buf = &mut self.inner;
263        let mut end: usize = 0; // 当前位置
264        let sql_len: usize = sql.len();
265        // let mut trim_token: Option<&str> = None;
266    
267        while end < sql_len { 
268            if let Some(idx) = skip_blank(&sql[end..sql_len]) {
269                end += idx;
270            } else {
271                break;
272            }
273            
274            let sql_token = get_token(&sql[end..sql_len])?;
275            let token_len = match sql_token {
276                // 如果是 DEL 控制符,则记录需要 trim 的 token,在下一次写入 sql_buf 时过滤字符串
277                Token::DEL(len, token) => {
278                    self.trim_token = Some(token.to_owned());
279                    len
280                }
281                // 需要输出的 sql token 如果开始位置有需要 DEL 的 token,则跳过此 token 写入 sql_buf,
282                // 写入后重置 trim_token 为 None.
283                Token::Normal(len, token) => {
284                    if let Some(tm_token) = &self.trim_token {
285                        let trim_len = tm_token.len();
286                        if trim_len <= token.len() && tm_token == &token[0..trim_len] {
287                            if trim_len < token.len() {
288                                sql_buf.push_str(&token[trim_len..]);
289                                sql_buf.push_str(" ");
290                            } else {
291                                sql_buf.push_str(&token[trim_len..]);
292                            }
293                        } else {
294                            sql_buf.push_str(token);
295                            sql_buf.push_str(" ");
296                        }
297                    } else {
298                        sql_buf.push_str(token);
299                        sql_buf.push_str(" ");
300                    }
301                    self.trim_token = None;
302    
303                    len
304                }
305            };
306    
307            end += token_len;
308        }
309
310        Ok(())
311    }
312}
313
314impl  Encoder for SqlEncoder  {
315    // Change this to `!` once stabilized.
316    type Error = SimpleError;
317
318    #[inline]
319    fn write_unescaped(&mut self, part: &str) -> Result<(), Self::Error> {
320        
321        self.trim_sql(part)?;
322
323        // println!("unescaped | bf:{}/ af:{}/", part, self.inner);
324
325        Ok(())
326    }
327
328    #[inline]
329    fn write_escaped(&mut self, part: &str) -> Result<(), Self::Error> {
330        EscapingStringEncoder(&mut self.inner).write_escaped(part);
331        self.inner.push_str(" ");
332        // println!("escaped | bf:{}/ af:{}/", part, self.inner);
333
334        Ok(())
335    }
336
337    #[cfg(feature = "pulldown-cmark")]
338    #[inline]
339    fn write_html<'b, I: Iterator<Item = Event<'b>>>(&mut self, iter: I) -> Result<(), Self::Error> {
340        html::push_html(&mut self.inner, iter);
341        // println!("aaaaaaaaa");
342
343        Ok(())
344    }
345
346    #[inline]
347    fn format_unescaped<D: fmt::Display>(&mut self, display: D) -> Result<(), Self::Error> {
348        use std::fmt::Write;
349
350        // Never fails for a string
351        let _ = write!(&mut self.inner, "{} ", display);
352        // println!("bbbbbbb");
353
354        Ok(())
355    }
356
357    #[inline]
358    fn format_escaped<D: fmt::Display>(&mut self, display: D) -> Result<(), Self::Error> {
359        use std::fmt::Write;
360
361        // Never fails for a string
362        let _ = write!(EscapingStringEncoder(&mut self.inner), "{} ", display);
363        // println!("cccccc");
364
365        Ok(())
366    }
367}
368
369
370
371/// 跳过空白字符,
372/// 遇到非空白字符时返回 Some(跳过的字符数),
373/// 遇到结尾时返回 None
374fn skip_blank(s: &str) -> Option<usize> {
375    let mut current_idx = 0;
376    let slen = s.len();
377
378    while current_idx < slen {
379        let c = char_at(s, current_idx);
380        let is_not_blank = BLANKET_CHARS.iter().all(|b| *b != c);
381        if is_not_blank { break }
382
383        current_idx += 1;
384    }
385
386    if current_idx < slen {
387        Some(current_idx)
388    } else {
389        None
390    }
391}
392
393#[inline]
394fn char_at(s: &str, idx: usize) -> u8 {
395    *&s[idx..idx + 1].as_bytes()[0]
396}
397
398/// stop at blank or end
399fn get_token(s: &str) -> Result<Token, SimpleError> {
400    let mut current_idx = 0;
401    let slen = s.len();
402
403    // ![DEL(xxx)] 处理
404    if s.len() >= 6 && "![DEL(" == &s[0..6] {
405        current_idx += 6;
406        let mut has_end = false;
407        while current_idx < slen {
408            let c = char_at(s, current_idx);
409            let is_blank = BLANKET_CHARS.iter().any(|&b| b == c);
410            current_idx += 1;
411
412            if is_blank { 
413                break 
414            } else if c == b')' {
415                let c = char_at(&s[current_idx..], 0);
416                if c == b']' {
417                    has_end = true;
418                    current_idx += 1;
419                    break
420                }
421            }
422        }
423
424        if has_end {
425            let token = &s[6..current_idx - 2];
426            return Ok(Token::DEL(current_idx, token))
427        } else {
428            Err(SimpleInnerError(" '![DEL(..)' syntax error".to_owned()))?
429        }
430    } else {
431        while current_idx < slen {
432            let c = char_at(s, current_idx);
433            let is_blank = BLANKET_CHARS.iter().any(|&b| b == c);
434            if is_blank { 
435                break 
436            }
437            current_idx += 1;
438        }
439        let token = &s[0..current_idx];
440        return Ok(Token::Normal(current_idx, token))
441    }
442}