vize_atelier_core 0.218.0

Atelier Core - The core workshop for Vize Vue template parsing, transform lanes, and code generation
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
//! Source Map v3 emission for generated render code.
//!
//! The codegen writes JavaScript into a single growing byte buffer. While that
//! buffer is being filled, [`SourceMapBuilder`] records *segments*: a generated
//! byte offset paired with the byte offset of the originating node in the
//! template source. Once emission finishes, [`SourceMapBuilder::finish`]
//! resolves both byte offsets to (line, column) — the generated side against the
//! final code buffer, the source side against the original template — and
//! serializes a standard [Source Map v3] document with base64 [VLQ]-encoded
//! `mappings`.
//!
//! Both sides are resolved from byte offsets rather than from the AST's
//! `line`/`column` fields: the parser does not track line breaks for node
//! positions (it reports everything on line 1), so its `column` is effectively a
//! global offset and its `line` is unusable for multi-line templates. The byte
//! `offset`, by contrast, is exact. Computing line/column here from the offsets
//! keeps the map correct regardless of that parser limitation and is the
//! conventional approach for source-map generators anyway.
//!
//! This is intentionally additive: recording a segment never writes to the code
//! buffer, so the generated `code` string is byte-identical whether or not the
//! `source_map` flag is enabled. The map is only assembled when the flag is on.
//!
//! Coverage spans the highest-value anchors — dynamic expressions (identifiers,
//! member expressions, event handlers, directive expressions), element tag
//! names, static attribute names and values, text/comment content, and
//! object-literal prop keys. Identifier-shaped anchors that carry a known source
//! symbol (a prop key, a static attribute name) additionally populate the v3
//! [`names`] array and the per-segment name index, so a consumer can recover the
//! original symbol. Mapping fidelity can still be widened incrementally; see the
//! crate-level codegen docs and the `#1533` tracking issue.
//!
//! [Source Map v3]: https://tc39.es/ecma426/
//! [`names`]: https://tc39.es/ecma426/#json-names
//! [VLQ]: https://en.wikipedia.org/wiki/Variable-length_quantity

use vize_carton::FxHashMap;
use vize_carton::String;
use vize_carton::ToCompactString;

/// A recorded mapping anchor, as raw byte offsets.
///
/// `generated_offset` is a byte offset into the code buffer captured at the
/// moment the mapped token was about to be written; `source_offset` is the byte
/// offset of the originating node in the original template source. Both are
/// resolved to (line, column) at [`SourceMapBuilder::finish`]. `name` is an
/// index into the builder's `names` table when the anchor carries a known
/// source symbol (a prop key, a static attribute name), or `None` for anonymous
/// anchors; it becomes the optional 5th VLQ field of the segment.
#[derive(Debug, Clone, Copy)]
struct Segment {
    generated_offset: u32,
    source_offset: u32,
    name: Option<u32>,
}

/// Accumulates source-map segments during codegen and serializes a v3 map.
///
/// Lives in the codegen context as an `Option`; it is `Some` only when the
/// `source_map` codegen flag is enabled, so the no-map path pays nothing.
#[derive(Debug, Default)]
pub(crate) struct SourceMapBuilder {
    segments: Vec<Segment>,
    /// The v3 `names` array, in insertion order. A segment's `name` is an index
    /// into this vector.
    names: Vec<String>,
    /// Interns symbol names to their index in `names`, so repeated symbols
    /// (e.g. an `id` attribute on many elements) share one `names` entry.
    name_index: FxHashMap<String, u32>,
}

impl SourceMapBuilder {
    /// Create an empty builder.
    pub(crate) fn new() -> Self {
        Self {
            segments: Vec::new(),
            names: Vec::new(),
            name_index: FxHashMap::default(),
        }
    }

    /// Record a mapping from the current generated byte offset to a source byte
    /// offset.
    ///
    /// Call this *immediately before* writing the mapped token to the code
    /// buffer, passing the buffer's current length as `generated_offset` and the
    /// originating AST node's `loc.start.offset` as `source_offset`.
    pub(crate) fn add_raw(&mut self, generated_offset: usize, source_offset: u32) {
        self.segments.push(Segment {
            generated_offset: generated_offset as u32,
            source_offset,
            name: None,
        });
    }

    /// Record a mapping that additionally carries a known source symbol `name`.
    ///
    /// The name is interned into the v3 `names` array (deduplicated across the
    /// whole map) and the segment references it by index, so a consumer can map
    /// the generated token back to the original identifier. Otherwise identical
    /// to [`add_raw`](Self::add_raw): call it immediately before writing the
    /// mapped token.
    pub(crate) fn add_named(&mut self, generated_offset: usize, source_offset: u32, name: &str) {
        let index = self.intern_name(name);
        self.segments.push(Segment {
            generated_offset: generated_offset as u32,
            source_offset,
            name: Some(index),
        });
    }

    /// Return the `names`-array index for `name`, inserting it if new.
    fn intern_name(&mut self, name: &str) -> u32 {
        if let Some(&index) = self.name_index.get(name) {
            return index;
        }
        let index = self.names.len() as u32;
        self.names.push(name.to_compact_string());
        self.name_index.insert(name.to_compact_string(), index);
        index
    }

    /// Serialize the accumulated segments into a Source Map v3 JSON string.
    ///
    /// `generated_code` is the final code buffer the segments index into;
    /// generated line/column for each segment is derived from it. `filename` is
    /// the original source path recorded in `sources`, and `source_content` is
    /// the original template text embedded as `sourcesContent` (so consumers can
    /// resolve mappings without a second fetch).
    ///
    /// Mappings are grouped by generated line and VLQ-encoded per the v3 spec.
    /// Each segment encodes 4 fields: generated column delta, source index
    /// delta, source line delta, source column delta (all relative to the
    /// previous segment, with source index/line/column relative across the whole
    /// document and generated column reset per line). Segments that carry a
    /// source symbol add a 5th field: the name-index delta (relative to the
    /// previous named segment across the whole document).
    pub(crate) fn finish(
        mut self,
        generated_code: &str,
        filename: &str,
        source_content: &str,
    ) -> String {
        // Resolve byte offsets to (line, col). Sort by generated offset first so
        // the generated-side scan cursor advances monotonically; the source side
        // is resolved with an independent binary search over precomputed line
        // starts, so it does not require the segments to be source-sorted.
        self.segments.sort_by_key(|s| s.generated_offset);

        let resolved = resolve_positions(generated_code, source_content, &self.segments);
        let mappings = encode_mappings(&resolved);

        // Use serde_json to build the document so `filename`/source content and
        // the `names` entries are correctly JSON-escaped (quotes, control chars,
        // unicode).
        let names: std::vec::Vec<&str> = self.names.iter().map(String::as_str).collect();
        let doc = serde_json::json!({
            "version": 3,
            "file": filename,
            "sources": [filename],
            "sourcesContent": [source_content],
            "names": names,
            "mappings": mappings.as_str(),
        });

        // serde_json writes into a std String; convert once to the workspace
        // CompactString that `CodegenResult.map` stores.
        serde_json::to_string(&doc)
            .unwrap_or_default()
            .to_compact_string()
    }
}

/// A segment whose endpoints have been resolved to 0-indexed line/column.
#[derive(Debug, Clone, Copy)]
struct ResolvedSegment {
    generated_line: u32,
    generated_column: u32,
    source_line: u32,
    source_column: u32,
    /// Index into the v3 `names` array when this anchor carries a known source
    /// symbol, carried through unchanged from the recorded segment.
    name: Option<u32>,
}

/// Resolve every segment's generated and source byte offsets to 0-indexed
/// (line, column) pairs.
///
/// The generated side uses a single monotonic forward scan (segments are sorted
/// by generated offset by the caller). The source side uses a precomputed table
/// of line-start byte offsets plus binary search, since segments are not sorted
/// by source offset.
///
/// Columns are counted in UTF-16 code units to match what source-map consumers
/// (browsers, the `source-map` npm package) expect. Both the generated render
/// code and template identifiers are overwhelmingly ASCII, so this equals the
/// byte/char column in the common case and only diverges on embedded non-ASCII.
fn resolve_positions(code: &str, source: &str, segments: &[Segment]) -> Vec<ResolvedSegment> {
    let code_bytes = code.as_bytes();
    let source_line_starts = line_start_table(source);

    let mut resolved = Vec::with_capacity(segments.len());

    let mut cursor = 0usize; // byte index into the generated code
    let mut gen_line = 0u32;
    let mut gen_line_start = 0usize;

    for seg in segments {
        // Generated side: advance the forward scan to this segment's offset.
        let target = (seg.generated_offset as usize).min(code_bytes.len());
        while cursor < target {
            if code_bytes[cursor] == b'\n' {
                gen_line += 1;
                gen_line_start = cursor + 1;
            }
            cursor += 1;
        }
        let generated_column = utf16_len(&code[gen_line_start..target]);

        // Source side: binary search the line-start table.
        let (source_line, source_column) =
            resolve_in_table(source, &source_line_starts, seg.source_offset as usize);

        resolved.push(ResolvedSegment {
            generated_line: gen_line,
            generated_column,
            source_line,
            source_column,
            name: seg.name,
        });
    }

    resolved
}

/// Byte offsets at which each line of `text` begins. Always starts with `0`;
/// every `\n` adds the offset just past it.
fn line_start_table(text: &str) -> Vec<usize> {
    let mut starts = Vec::with_capacity(16);
    starts.push(0);
    for (i, &b) in text.as_bytes().iter().enumerate() {
        if b == b'\n' {
            starts.push(i + 1);
        }
    }
    starts
}

/// Resolve a byte `offset` into `text` to a 0-indexed (line, column) using a
/// precomputed `line_starts` table. Column is in UTF-16 code units.
fn resolve_in_table(text: &str, line_starts: &[usize], offset: usize) -> (u32, u32) {
    let offset = offset.min(text.len());
    // The line is the index of the greatest line-start <= offset.
    let line = match line_starts.binary_search(&offset) {
        Ok(i) => i,
        Err(i) => i - 1, // i >= 1 because line_starts[0] == 0 <= offset
    };
    let line_start = line_starts[line];
    let column = utf16_len(&text[line_start..offset]);
    (line as u32, column)
}

/// UTF-16 code-unit length of `s` (chars above U+FFFF count as 2).
#[inline]
fn utf16_len(s: &str) -> u32 {
    let mut n = 0u32;
    for ch in s.chars() {
        n += ch.len_utf16() as u32;
    }
    n
}

/// Encode resolved segments as a v3 `mappings` string.
///
/// Lines are separated by `;`; segments within a line by `,`. Each segment is 4
/// VLQ-encoded deltas: generated column, source index, source line, source
/// column — plus an optional 5th, the name index, present only for segments
/// that carry a source symbol. Generated column is relative to the previous
/// segment on the same line (reset to absolute at each new line); the other
/// fields (source index/line/column and the name index) are relative to the
/// previous segment that emitted them, across the whole document.
fn encode_mappings(segments: &[ResolvedSegment]) -> String {
    let mut out = String::with_capacity(segments.len() * 6);

    let mut current_line = 0u32;
    let mut prev_generated_column = 0i64;
    let mut prev_source_index = 0i64;
    let mut prev_source_line = 0i64;
    let mut prev_source_column = 0i64;
    let mut prev_name_index = 0i64;
    let mut first_in_line = true;

    for seg in segments {
        // Emit `;` for every generated line we skip past, resetting the
        // per-line generated-column accumulator each time.
        while current_line < seg.generated_line {
            out.push(';');
            current_line += 1;
            prev_generated_column = 0;
            first_in_line = true;
        }

        if first_in_line {
            first_in_line = false;
        } else {
            out.push(',');
        }

        let gen_col = seg.generated_column as i64;
        let src_index = 0i64; // single source
        let src_line = seg.source_line as i64;
        let src_col = seg.source_column as i64;

        encode_vlq(&mut out, gen_col - prev_generated_column);
        encode_vlq(&mut out, src_index - prev_source_index);
        encode_vlq(&mut out, src_line - prev_source_line);
        encode_vlq(&mut out, src_col - prev_source_column);

        // The v3 name index is the optional 5th field. It is delta-encoded
        // against the previous *named* segment (the running accumulator only
        // advances when a name is present), matching how consumers decode it.
        if let Some(name) = seg.name {
            let name_index = name as i64;
            encode_vlq(&mut out, name_index - prev_name_index);
            prev_name_index = name_index;
        }

        prev_generated_column = gen_col;
        prev_source_index = src_index;
        prev_source_line = src_line;
        prev_source_column = src_col;
    }

    out
}

const BASE64_CHARS: &[u8; 64] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";

/// Append the base64 VLQ encoding of a signed integer to `out`.
///
/// VLQ uses the low bit as the sign and 5-bit groups, the high (6th) bit of each
/// group being a continuation flag, matching the Source Map v3 encoding.
fn encode_vlq(out: &mut String, value: i64) {
    // Zig-zag the sign into the low bit.
    let mut vlq: u64 = if value < 0 {
        ((-value as u64) << 1) | 1
    } else {
        (value as u64) << 1
    };

    loop {
        let mut digit = (vlq & 0b1_1111) as usize;
        vlq >>= 5;
        if vlq != 0 {
            // Set the continuation bit.
            digit |= 0b10_0000;
        }
        out.push(BASE64_CHARS[digit] as char);
        if vlq == 0 {
            break;
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    /// Decode a single VLQ-encoded value from `chars`, returning the value and
    /// the number of base64 digits consumed. Test-only mirror of the encoder so
    /// the round trip can be asserted without an external dependency.
    fn decode_vlq(chars: &[u8]) -> (i64, usize) {
        let mut result: u64 = 0;
        let mut shift = 0u32;
        let mut consumed = 0usize;
        for &c in chars {
            let digit = BASE64_CHARS.iter().position(|&b| b == c).unwrap() as u64;
            consumed += 1;
            let has_continuation = (digit & 0b10_0000) != 0;
            result |= (digit & 0b1_1111) << shift;
            shift += 5;
            if !has_continuation {
                break;
            }
        }
        let negative = (result & 1) != 0;
        let magnitude = (result >> 1) as i64;
        (if negative { -magnitude } else { magnitude }, consumed)
    }

    fn roundtrip(value: i64) {
        let mut s = String::default();
        encode_vlq(&mut s, value);
        let (decoded, consumed) = decode_vlq(s.as_bytes());
        assert_eq!(decoded, value, "VLQ roundtrip failed for {value}");
        assert_eq!(consumed, s.len(), "consumed != encoded len for {value}");
    }

    #[test]
    fn vlq_roundtrip_basic() {
        for v in [
            0, 1, -1, 15, 16, -16, 17, -17, 1000, -1000, 123_456, -123_456,
        ] {
            roundtrip(v);
        }
    }

    #[test]
    fn vlq_known_encodings() {
        // Reference values from the Source Map v3 spec / mozilla source-map.
        let mut s = String::default();
        encode_vlq(&mut s, 0);
        assert_eq!(s.as_str(), "A");
        s = String::default();
        encode_vlq(&mut s, 1);
        assert_eq!(s.as_str(), "C");
        s = String::default();
        encode_vlq(&mut s, -1);
        assert_eq!(s.as_str(), "D");
        s = String::default();
        encode_vlq(&mut s, 16);
        assert_eq!(s.as_str(), "gB");
    }

    #[test]
    fn utf16_len_counts_surrogates() {
        assert_eq!(utf16_len("abc"), 3);
        assert_eq!(utf16_len("é"), 1); // U+00E9, single unit
        assert_eq!(utf16_len("𝟘"), 2); // U+1D7D8, surrogate pair
    }

    #[test]
    fn resolve_generated_side_multiline() {
        let code = "line0\nlinX1\nli";
        // 'X' at byte 9 (gen line 1, col 3); 'l' of "li" at byte 12 (gen line 2, col 0)
        let segs = [
            Segment {
                generated_offset: 9,
                source_offset: 0,
                name: None,
            },
            Segment {
                generated_offset: 12,
                source_offset: 0,
                name: None,
            },
        ];
        let resolved = resolve_positions(code, "", &segs);
        assert_eq!(
            (resolved[0].generated_line, resolved[0].generated_column),
            (1, 3)
        );
        assert_eq!(
            (resolved[1].generated_line, resolved[1].generated_column),
            (2, 0)
        );
    }

    #[test]
    fn resolve_source_offset_to_line_column() {
        // "<div>\n  {{ msg }}\n</div>": line 1 starts at byte 6, so `msg` at byte
        // offset 11 is on line 1 (0-indexed), column 5 (`  {{ ` precedes it).
        let source = "<div>\n  {{ msg }}\n</div>";
        let starts = line_start_table(source);
        assert_eq!(resolve_in_table(source, &starts, 11), (1, 5));
        // The `<` at offset 0 is line 0, column 0.
        assert_eq!(resolve_in_table(source, &starts, 0), (0, 0));
        // The `<` of `</div>` is at offset 18, line 2, column 0.
        assert_eq!(resolve_in_table(source, &starts, 18), (2, 0));
    }

    #[test]
    fn finish_produces_valid_v3_doc() {
        let mut b = SourceMapBuilder::new();
        // generated `_ctx.msg` at offset 0; source `msg` at byte offset 8.
        b.add_raw(0, 8);
        let code = "_ctx.msg";
        let json = b.finish(code, "template.vue", "<div>{{ msg }}</div>");
        let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
        assert_eq!(parsed["version"], 3);
        assert_eq!(parsed["sources"][0], "template.vue");
        assert_eq!(parsed["sourcesContent"][0], "<div>{{ msg }}</div>");
        // First segment of first line: gen col 0, src 0, src line 0, src col 8.
        let mappings = parsed["mappings"].as_str().unwrap();
        let (gen_col, c1) = decode_vlq(mappings.as_bytes());
        let (src_idx, c2) = decode_vlq(&mappings.as_bytes()[c1..]);
        let (src_line, c3) = decode_vlq(&mappings.as_bytes()[c1 + c2..]);
        let (src_col, _) = decode_vlq(&mappings.as_bytes()[c1 + c2 + c3..]);
        assert_eq!((gen_col, src_idx, src_line, src_col), (0, 0, 0, 8));
    }

    #[test]
    fn named_segment_populates_names_and_fifth_field() {
        let mut b = SourceMapBuilder::new();
        // Generated prop key `id` at offset 0; source `id` at byte offset 5.
        b.add_named(0, 5, "id");
        let code = "id: \"app\"";
        let json = b.finish(code, "Foo.vue", r#"<div id="app">"#);
        let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();

        // The symbol is recorded in `names`.
        assert_eq!(parsed["names"][0], "id");

        // The single segment carries all 5 VLQ fields, the last being the
        // name-index delta (0, i.e. the first `names` entry).
        let mappings = parsed["mappings"].as_str().unwrap();
        let bytes = mappings.as_bytes();
        let (gen_col, c1) = decode_vlq(bytes);
        let (src_idx, c2) = decode_vlq(&bytes[c1..]);
        let (src_line, c3) = decode_vlq(&bytes[c1 + c2..]);
        let (src_col, c4) = decode_vlq(&bytes[c1 + c2 + c3..]);
        let consumed = c1 + c2 + c3 + c4;
        assert!(consumed < bytes.len(), "a 5th VLQ field must be present");
        let (name_idx, c5) = decode_vlq(&bytes[consumed..]);
        assert_eq!(
            (gen_col, src_idx, src_line, src_col, name_idx),
            (0, 0, 0, 5, 0)
        );
        assert_eq!(consumed + c5, bytes.len(), "no trailing bytes after name");
    }

    #[test]
    fn intern_name_deduplicates() {
        let mut b = SourceMapBuilder::new();
        // Same symbol on two anchors interns to one `names` entry.
        b.add_named(0, 0, "id");
        b.add_named(10, 4, "id");
        b.add_named(20, 8, "class");
        let json = b.finish("0123456789012345678901234", "Foo.vue", "");
        let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
        let names = parsed["names"].as_array().unwrap();
        assert_eq!(names.len(), 2, "`id` should be deduplicated");
        assert_eq!(names[0], "id");
        assert_eq!(names[1], "class");
    }
}