vue_compiler_core/util/
v_str.rs

1//! There is still a lot we can optimize VStr
2//! * instead of using &str, we can use intern to cache static attr name.
3//! * we can also cache camelize/capitalize result.
4//! * if VStr raw already satisfy StrOps, setting the ops flag is noop.
5//! * interning/cache can be optional, e.g. Text Token can skip it at all.
6use super::{is_event_prop, non_whitespace, not_js_identifier};
7use bitflags::bitflags;
8use std::{
9    io::{self, Write},
10    ops::Deref,
11};
12
13bitflags! {
14    /// Represents string manipulation. It has two categories:
15    /// 1. IDEMPOTENT_OPS and 2. AFFINE_OPS,
16    /// depending on whether the manipulation is idempotent or not
17    /// NB strops is order sensitive when it is cast to string.
18    #[derive(Default)]
19    pub struct StrOps: u16 {
20        const HANDLER_KEY         = 1 << 0;
21        const VALID_DIR           = 1 << 1;
22        const VALID_COMP          = 1 << 2;
23        const V_DIR_PREFIX        = 1 << 3;
24        const COMPRESS_WHITESPACE = 1 << 4;
25        const DECODE_ENTITY       = 1 << 5;
26        const CAMEL_CASE          = 1 << 6;
27        const CAPITALIZED         = 1 << 7;
28        const JS_STRING           = 1 << 8;
29        const CTX_PREFIX          = 1 << 9;
30        // marker op is placed at the end
31        const SELF_SUFFIX         = 1 << 10;
32        const IS_ATTR             = 1 << 11;
33        /// Ops that can be safely carried out multiple times
34        const IDEMPOTENT_OPS =
35            Self::COMPRESS_WHITESPACE.bits | Self::DECODE_ENTITY.bits |
36            Self::CAMEL_CASE.bits | Self::CAPITALIZED.bits | Self::IS_ATTR.bits;
37        /// Ops that can only be performed at most once. Name comes from
38        /// https://en.wikipedia.org/wiki/Substructural_type_system
39        const AFFINE_OPS =
40            Self::HANDLER_KEY.bits | Self::VALID_DIR.bits | Self::VALID_COMP.bits |
41            Self::SELF_SUFFIX.bits | Self::V_DIR_PREFIX.bits | Self::JS_STRING.bits |
42            Self::CTX_PREFIX.bits;
43        /// Ops that mark the string is an hoisted asset
44        const ASSET_OPS = Self::VALID_DIR.bits | Self::VALID_COMP.bits |
45            Self::SELF_SUFFIX.bits;
46    }
47}
48
49// NB: JS word boundary is `\w`: `[a-zA-Z0-9-]`.
50fn write_camelized<W: Write>(s: &str, mut w: W) -> io::Result<()> {
51    // str.replace(/-(\w)/g, (_, c) => c.toUpperCase())
52    let mut is_minus = false;
53    for c in s.chars() {
54        if c.is_ascii_alphanumeric() && is_minus {
55            write!(w, "{}", c.to_ascii_uppercase())?;
56            is_minus = false;
57            continue;
58        }
59        // write pending -
60        if is_minus {
61            write!(w, "-")?;
62        }
63        is_minus = c == '-';
64        if !is_minus {
65            write!(w, "{}", c)?;
66        }
67    }
68    if is_minus {
69        write!(w, "-")
70    } else {
71        Ok(())
72    }
73}
74fn write_capitalized<W: Write>(s: &str, mut w: W) -> io::Result<()> {
75    if s.is_empty() {
76        return Ok(());
77    }
78    let c = s.chars().next().unwrap();
79    write!(w, "{}", c.to_uppercase())?;
80    let s = &s[c.len_utf8()..];
81    w.write_all(s.as_bytes())
82}
83
84fn write_hyphenated<W: Write>(s: &str, mut w: W) -> io::Result<()> {
85    // https://javascript.info/regexp-boundary
86    // str.replace(/\B([A-Z])/g, '-$1').toLowerCase()
87    let mut is_boundary = true;
88    for c in s.chars() {
89        if !is_boundary && c.is_ascii_uppercase() {
90            w.write_all(b"-")?;
91            write!(w, "{}", c.to_ascii_lowercase())?;
92            is_boundary = false;
93        } else {
94            write!(w, "{}", c)?;
95            is_boundary = !c.is_ascii_alphanumeric() && c != '_';
96        }
97    }
98    Ok(())
99}
100
101fn write_json_string<W: Write>(s: &str, w: &mut W) -> io::Result<()> {
102    use json::codegen::{Generator, WriterGenerator};
103    let mut gen = WriterGenerator::new(w);
104    gen.write_string(s)
105}
106
107/// compress consecutive whitespaces into one.
108fn write_compressed<W: Write>(mut s: &str, mut w: W) -> io::Result<()> {
109    while let Some(p) = s.find(|c: char| c.is_ascii_whitespace()) {
110        let (prev, after) = s.split_at(p);
111        w.write_all(prev.as_bytes())?;
112        w.write_all(b" ")?;
113        if let Some(p) = after.find(non_whitespace) {
114            s = after.split_at(p).1;
115        } else {
116            s = "";
117        }
118    }
119    w.write_all(s.as_bytes())
120}
121
122/// decode html entity before writing.
123fn write_decoded<W: Write>(s: &str, mut w: W) -> io::Result<()> {
124    if !s.contains('&') {
125        return w.write_all(s.as_bytes());
126    }
127    todo!()
128}
129
130fn write_valid_asset<W: Write>(mut s: &str, mut w: W, asset: &str) -> io::Result<()> {
131    write!(w, "_{}_", asset)?;
132    while let Some(n) = s.find(not_js_identifier) {
133        let (prev, next) = s.split_at(n);
134        write!(w, "{}", prev)?;
135        let c = next.chars().next().unwrap();
136        if c == '-' {
137            write!(w, "_")?;
138        } else {
139            write!(w, "{}", c as u32)?;
140        }
141        s = &next[c.len_utf8()..];
142    }
143    write!(w, "{}", s)?;
144    Ok(())
145}
146
147impl StrOps {
148    // ideally it should be str.satisfy(op) but adding a trait
149    // to str is too much. Use passive voice.
150    fn is_satisfied_by(&self, s: &str) -> bool {
151        todo!()
152    }
153    fn write_ops<W: Write>(&self, s: &str, mut w: W) -> io::Result<()> {
154        let flag_count = self.bits().count_ones();
155        if flag_count == 0 {
156            return w.write_all(s.as_bytes());
157        }
158        if flag_count == 1 {
159            return Self::write_one_op(*self, s, w);
160        }
161        let mut src = s;
162        let mut temp = vec![];
163        let mut dest = vec![];
164        for op in self.iter() {
165            Self::write_one_op(op, src, &mut dest)?;
166            std::mem::swap(&mut temp, &mut dest);
167            dest.clear();
168            src = std::str::from_utf8(&temp).expect("must be valid string");
169        }
170        w.write_all(src.as_bytes())
171    }
172    fn write_one_op<W: Write>(op: Self, s: &str, mut w: W) -> io::Result<()> {
173        debug_assert!(op.bits().count_ones() == 1);
174        match op {
175            StrOps::COMPRESS_WHITESPACE => write_compressed(s, w),
176            StrOps::DECODE_ENTITY => write_decoded(s, w),
177            StrOps::JS_STRING => write_json_string(s, &mut w),
178            StrOps::CAMEL_CASE => write_camelized(s, w),
179            StrOps::CAPITALIZED => write_capitalized(s, w),
180            StrOps::VALID_DIR => write_valid_asset(s, w, "directive"),
181            StrOps::VALID_COMP => write_valid_asset(s, w, "component"),
182            StrOps::IS_ATTR => w.write_all(s.as_bytes()), // NOOP
183            StrOps::SELF_SUFFIX => {
184                // noop, just a marker
185                w.write_all(s.as_bytes())
186            }
187            StrOps::V_DIR_PREFIX => {
188                w.write_all(b"v-")?;
189                w.write_all(s.as_bytes())
190            }
191            StrOps::CTX_PREFIX => {
192                w.write_all(b"_ctx.")?;
193                w.write_all(s.as_bytes())
194            }
195            _ => todo!("{:?} not implemented", op),
196        }
197    }
198    fn iter(&self) -> StrOpIter {
199        StrOpIter(*self)
200    }
201}
202
203struct StrOpIter(StrOps);
204impl Iterator for StrOpIter {
205    type Item = StrOps;
206    fn next(&mut self) -> Option<Self::Item> {
207        let ops = &mut self.0;
208        if ops.is_empty() {
209            None
210        } else {
211            let bits = 1 << ops.bits().trailing_zeros();
212            let r = StrOps { bits };
213            ops.remove(r);
214            Some(r)
215        }
216    }
217    fn size_hint(&self) -> (usize, Option<usize>) {
218        let bits = self.0.bits().count_ones() as usize;
219        (bits, Some(bits))
220    }
221}
222
223impl ExactSizeIterator for StrOpIter {}
224
225/// A str for Vue compiler's internal modification.
226/// Instead of returning a Cow<str>, StrOp is recorded in the VStr
227/// and will be processed later in codegen phase.
228#[derive(Clone, Copy, PartialEq, Eq, Hash, Default)]
229pub struct VStr<'a> {
230    pub raw: &'a str,
231    pub ops: StrOps,
232}
233
234impl<'a> VStr<'a> {
235    // adjective and is_xx for static method
236    pub fn raw(raw: &'a str) -> Self {
237        Self {
238            raw,
239            ops: StrOps::empty(),
240        }
241    }
242    pub fn is_handler(s: &VStr) -> bool {
243        if s.ops.contains(StrOps::HANDLER_KEY) {
244            return true;
245        }
246        is_event_prop(s.raw)
247    }
248    pub fn is_self_suffixed(s: &VStr) -> bool {
249        s.ops.contains(StrOps::SELF_SUFFIX)
250    }
251    pub fn is_asset(s: &VStr) -> bool {
252        s.ops.intersects(StrOps::ASSET_OPS)
253    }
254    pub fn is_ctx_prefixed(s: &VStr) -> bool {
255        s.ops.contains(StrOps::CTX_PREFIX)
256    }
257}
258impl<'a> VStr<'a> {
259    // verb is instance method
260    pub fn decode(&mut self, is_attr: bool) -> &mut Self {
261        let ops = if is_attr {
262            StrOps::DECODE_ENTITY | StrOps::IS_ATTR
263        } else {
264            StrOps::DECODE_ENTITY
265        };
266        self.ops |= ops;
267        self
268    }
269    pub fn camelize(&mut self) -> &mut Self {
270        self.ops |= StrOps::CAMEL_CASE;
271        self
272    }
273    pub fn capitalize(&mut self) -> &mut Self {
274        self.ops |= StrOps::CAPITALIZED;
275        self
276    }
277    pub fn pascalize(&mut self) -> &mut Self {
278        self.camelize().capitalize()
279    }
280    pub fn compress_whitespace(&mut self) -> &mut Self {
281        self.ops |= StrOps::COMPRESS_WHITESPACE;
282        self
283    }
284    /// convert v-on arg to handler key: click -> onClick
285    pub fn be_handler(&mut self) -> &mut Self {
286        self.ops |= StrOps::HANDLER_KEY;
287        self
288    }
289    /// add __self suffix for self referring component
290    pub fn suffix_self(&mut self) -> &mut Self {
291        self.ops |= StrOps::SELF_SUFFIX;
292        self
293    }
294    /// convert into a valid asset id
295    pub fn be_component(&mut self) -> &mut Self {
296        self.ops |= StrOps::VALID_COMP;
297        self
298    }
299    pub fn unbe_component(&mut self) -> &mut Self {
300        self.ops.remove(StrOps::VALID_COMP);
301        self
302    }
303    pub fn be_directive(&mut self) -> &mut Self {
304        self.ops |= StrOps::VALID_DIR;
305        self
306    }
307    pub fn unbe_directive(&mut self) -> &mut Self {
308        self.ops.remove(StrOps::VALID_DIR);
309        self
310    }
311    /// convert into a valid asset id
312    pub fn prefix_v_dir(&mut self) -> &mut Self {
313        self.ops |= StrOps::V_DIR_PREFIX;
314        self
315    }
316    pub fn be_js_str(&mut self) -> &mut Self {
317        self.ops |= StrOps::JS_STRING;
318        self
319    }
320    pub fn prefix_ctx(&mut self) -> &mut Self {
321        self.ops |= StrOps::CTX_PREFIX;
322        self
323    }
324    pub fn into_string(self) -> String {
325        let mut ret = vec![];
326        self.write_to(&mut ret).expect("string should never fail");
327        String::from_utf8(ret).expect("vstr should write valid utf8")
328    }
329
330    pub fn write_to<W: Write>(&self, w: W) -> io::Result<()> {
331        self.ops.write_ops(self.raw, w)
332    }
333}
334
335impl<'a> Deref for VStr<'a> {
336    type Target = str;
337    fn deref(&self) -> &Self::Target {
338        self.raw
339    }
340}
341
342impl<'a> From<&'a str> for VStr<'a> {
343    fn from(s: &'a str) -> Self {
344        VStr::raw(s)
345    }
346}
347
348#[cfg(feature = "serde")]
349impl<'a> serde::Serialize for VStr<'a> {
350    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
351    where
352        S: serde::Serializer,
353    {
354        let s = self.into_string();
355        serializer.serialize_str(&s)
356    }
357}
358
359#[cfg(test)]
360mod test {
361    use super::*;
362
363    #[test]
364    fn test_v_str_size() {
365        assert_eq!(std::mem::size_of::<VStr>(), 24);
366    }
367
368    // TODO: proptest can test invariant
369    #[test]
370    fn test_str_ops_iter() {
371        let a = StrOps::all();
372        let v: Vec<_> = a.iter().collect();
373        assert_eq!(v.len() as u32, a.bits().count_ones());
374        assert!(v.iter().all(|op| op.bits().count_ones() == 1));
375        let a = StrOps::empty();
376        let v = a.iter().count();
377        assert_eq!(v, 0);
378        let a = StrOps::V_DIR_PREFIX | StrOps::VALID_COMP;
379        let v: Vec<_> = a.iter().collect();
380        assert_eq!(v[0], StrOps::VALID_COMP);
381        assert_eq!(v[1], StrOps::V_DIR_PREFIX);
382        assert_eq!(v.len(), 2);
383    }
384
385    fn write_string(ops: StrOps, s: &str) -> String {
386        let mut w = vec![];
387        ops.write_ops(s, &mut w).unwrap();
388        String::from_utf8(w).unwrap()
389    }
390
391    #[test]
392    fn test_str_ops_write() {
393        let src = "test";
394        let cases = [
395            (StrOps::empty(), "test"),
396            (StrOps::V_DIR_PREFIX, "v-test"),
397            (StrOps::V_DIR_PREFIX, "v-test"),
398            (StrOps::SELF_SUFFIX, "test"),
399            (StrOps::JS_STRING, stringify!("test")),
400            (StrOps::CAMEL_CASE | StrOps::V_DIR_PREFIX, "vTest"),
401        ];
402        for (ops, expect) in cases {
403            let origin = ops;
404            assert_eq!(write_string(ops, src), expect);
405            assert_eq!(ops, origin);
406        }
407    }
408
409    #[test]
410    fn test_str_ops_write_edge() {
411        let cases = [
412            ("å—化ã‘", StrOps::empty(), "å—化ã‘"),
413            ("å—化ã‘", StrOps::JS_STRING, stringify!("å—化ã‘")),
414            ("foo-bar", StrOps::CAMEL_CASE, "fooBar"),
415            ("foo-bar", StrOps::CAPITALIZED, "Foo-bar"),
416            ("", StrOps::CAPITALIZED, ""),
417            ("ālaya-vijñāna", StrOps::CAMEL_CASE, "ālayaVijñāna"),
418            ("आलयविज्ञान", StrOps::CAMEL_CASE, "आलयविज्ञान"),
419            ("ω", StrOps::CAPITALIZED, "Ω"),
420            (
421                "foo-bar",
422                StrOps::CAPITALIZED | StrOps::CAMEL_CASE,
423                "FooBar",
424            ),
425            ("-a-b-c", StrOps::CAMEL_CASE, "ABC"),
426            ("a-a-b-c", StrOps::CAMEL_CASE, "aABC"),
427            ("a--b", StrOps::CAMEL_CASE, "a-B"),
428            ("a--b", StrOps::VALID_COMP, "_component_a__b"),
429            ("aいろは", StrOps::VALID_COMP, "_component_aいろは"),
430            ("a^_^", StrOps::VALID_COMP, "_component_a94_94"),
431            ("a--b", StrOps::VALID_DIR, "_directive_a__b"),
432            ("a--", StrOps::VALID_DIR, "_directive_a__"),
433        ];
434        for (src, ops, expect) in cases {
435            let origin = ops;
436            assert_eq!(write_string(ops, src), expect);
437            assert_eq!(ops, origin);
438        }
439    }
440}