Skip to main content

sqruff_lib_core/
templaters.rs

1use std::cmp::Ordering;
2use std::ops::{Deref, Range};
3use std::sync::Arc;
4
5#[cfg(feature = "stringify")]
6use serde::{Deserialize, Serialize};
7
8use smol_str::SmolStr;
9
10use crate::errors::SQLFluffSkipFile;
11use crate::slice_helpers::zero_slice;
12
13/// A slice referring to a templated file.
14#[cfg_attr(feature = "stringify", derive(Serialize))]
15#[derive(Debug, Clone, PartialEq, Eq, Hash)]
16pub struct TemplatedFileSlice {
17    pub slice_type: String,
18    pub source_slice: Range<usize>,
19    pub templated_slice: Range<usize>,
20}
21
22impl TemplatedFileSlice {
23    pub fn new(
24        slice_type: &str,
25        source_slice: Range<usize>,
26        templated_slice: Range<usize>,
27    ) -> Self {
28        Self {
29            slice_type: slice_type.to_string(),
30            source_slice,
31            templated_slice,
32        }
33    }
34}
35
36/// A templated SQL file.
37///
38/// This is the response of a `templater`'s `.process()` method
39/// and contains both references to the original file and also
40/// the capability to split up that file when lexing.
41#[derive(Debug, PartialEq, Eq, Clone, Hash, Default)]
42pub struct TemplatedFile {
43    inner: Arc<TemplatedFileInner>,
44}
45
46impl TemplatedFile {
47    pub fn new(
48        source_str: String,
49        name: String,
50        input_templated_str: Option<String>,
51        sliced_file: Option<Vec<TemplatedFileSlice>>,
52        input_raw_sliced: Option<Vec<RawFileSlice>>,
53    ) -> Result<TemplatedFile, SQLFluffSkipFile> {
54        Ok(TemplatedFile {
55            inner: Arc::new(TemplatedFileInner::new(
56                source_str,
57                name,
58                input_templated_str,
59                sliced_file,
60                input_raw_sliced,
61            )?),
62        })
63    }
64
65    pub fn name(&self) -> &str {
66        &self.inner.name
67    }
68
69    #[cfg(feature = "stringify")]
70    pub fn to_yaml(&self) -> String {
71        let inner = &*self.inner;
72        serde_yaml::to_string(inner).unwrap()
73    }
74}
75
76impl From<String> for TemplatedFile {
77    fn from(raw: String) -> Self {
78        TemplatedFile {
79            inner: Arc::new(
80                TemplatedFileInner::new(raw, "<string>".to_string(), None, None, None).unwrap(),
81            ),
82        }
83    }
84}
85
86impl From<&str> for TemplatedFile {
87    fn from(raw: &str) -> Self {
88        TemplatedFile {
89            inner: Arc::new(
90                TemplatedFileInner::new(raw.to_string(), "<string>".to_string(), None, None, None)
91                    .unwrap(),
92            ),
93        }
94    }
95}
96
97impl Deref for TemplatedFile {
98    type Target = TemplatedFileInner;
99
100    fn deref(&self) -> &Self::Target {
101        &self.inner
102    }
103}
104
105#[cfg_attr(feature = "stringify", derive(Serialize))]
106#[derive(Debug, PartialEq, Eq, Clone, Hash, Default)]
107pub struct TemplatedFileInner {
108    pub source_str: String,
109    name: String,
110    pub templated_str: Option<String>,
111    source_newlines: Vec<usize>,
112    templated_newlines: Vec<usize>,
113    raw_sliced: Vec<RawFileSlice>,
114    pub sliced_file: Vec<TemplatedFileSlice>,
115}
116
117impl TemplatedFileInner {
118    /// Initialise the TemplatedFile.
119    /// If no templated_str is provided then we assume that
120    /// the file is NOT templated and that the templated view
121    /// is the same as the source view.
122    pub fn new(
123        source_str: String,
124        f_name: String,
125        input_templated_str: Option<String>,
126        sliced_file: Option<Vec<TemplatedFileSlice>>,
127        input_raw_sliced: Option<Vec<RawFileSlice>>,
128    ) -> Result<TemplatedFileInner, SQLFluffSkipFile> {
129        // Assume that no sliced_file, means the file is not templated.
130        // TODO Will this not always be Some and so type can avoid Option?
131        let templated_str = input_templated_str.clone().unwrap_or(source_str.clone());
132
133        let (sliced_file, raw_sliced): (Vec<TemplatedFileSlice>, Vec<RawFileSlice>) =
134            match sliced_file {
135                None => {
136                    if templated_str != source_str {
137                        panic!("Cannot instantiate a templated file unsliced!")
138                    } else if input_raw_sliced.is_some() {
139                        panic!("Templated file was not sliced, but not has raw slices.")
140                    } else {
141                        (
142                            vec![TemplatedFileSlice::new(
143                                "literal",
144                                0..source_str.len(),
145                                0..source_str.len(),
146                            )],
147                            vec![RawFileSlice::new(
148                                source_str.clone(),
149                                "literal".to_string(),
150                                0,
151                                None,
152                                None,
153                            )],
154                        )
155                    }
156                }
157                Some(sliced_file) => {
158                    if let Some(raw_sliced) = input_raw_sliced {
159                        (sliced_file, raw_sliced)
160                    } else {
161                        panic!("Templated file was sliced, but not raw.")
162                    }
163                }
164            };
165
166        // Precalculate newlines, character positions.
167        let source_newlines: Vec<usize> = iter_indices_of_newlines(source_str.as_str()).collect();
168        let templated_newlines: Vec<usize> =
169            iter_indices_of_newlines(templated_str.as_str()).collect();
170
171        // Consistency check raw string and slices.
172        let mut pos = 0;
173        for rfs in &raw_sliced {
174            if rfs.source_idx != pos {
175                panic!(
176                    "TemplatedFile. Consistency fail on running source length. {} != {}",
177                    pos, rfs.source_idx
178                )
179            }
180            pos += rfs.raw.len();
181        }
182        if pos != source_str.len() {
183            panic!(
184                "TemplatedFile. Consistency fail on final source length. {} != {}",
185                pos,
186                source_str.len()
187            )
188        }
189
190        // Consistency check templated string and slices.
191        let mut previous_slice: Option<&TemplatedFileSlice> = None;
192        let mut outer_tfs: Option<&TemplatedFileSlice> = None;
193        for tfs in &sliced_file {
194            match &previous_slice {
195                Some(previous_slice) => {
196                    if tfs.templated_slice.start != previous_slice.templated_slice.end {
197                        return Err(SQLFluffSkipFile::new(
198                            "Templated slices found to be non-contiguous.".to_string(),
199                        ));
200                        // TODO Make this nicer again
201                        // format!(
202                        //     "Templated slices found to be non-contiguous.
203                        // {:?} (starting {:?}) does not follow {:?} (starting
204                        // {:?})",
205                        //     tfs.templated_slice,
206                        //     templated_str[tfs.templated_slice],
207                        //     previous_slice.templated_slice,
208                        //     templated_str[previous_slice.templated_slice],
209                        // )
210                    }
211                }
212                None => {
213                    if tfs.templated_slice.start != 0 {
214                        return Err(SQLFluffSkipFile::new(format!(
215                            "First templated slice does not start at 0, (found slice {:?})",
216                            tfs.templated_slice
217                        )));
218                    }
219                }
220            }
221            previous_slice = Some(tfs);
222            outer_tfs = Some(tfs)
223        }
224        if !sliced_file.is_empty()
225            && input_templated_str.is_some()
226            && let Some(outer_tfs) = outer_tfs
227            && outer_tfs.templated_slice.end != templated_str.len()
228        {
229            return Err(SQLFluffSkipFile::new(format!(
230                "Last templated slice does not end at end of string, (found slice {:?})",
231                outer_tfs.templated_slice
232            )));
233        }
234
235        Ok(TemplatedFileInner {
236            raw_sliced,
237            source_newlines,
238            templated_newlines,
239            source_str: source_str.clone(),
240            sliced_file,
241            name: f_name,
242            templated_str: Some(templated_str),
243        })
244    }
245
246    /// Return true if there's a templated file.
247    pub fn is_templated(&self) -> bool {
248        self.templated_str.is_some()
249    }
250
251    /// Get the line number and position of a point in the source file.
252    /// Args:
253    ///  - char_pos: The character position in the relevant file.
254    ///  - source: Are we checking the source file (as opposed to the templated
255    ///    file)
256    ///
257    /// Returns: line_number, line_position
258    pub fn get_line_pos_of_char_pos(&self, char_pos: usize, source: bool) -> (usize, usize) {
259        let ref_str = if source {
260            &self.source_newlines
261        } else {
262            &self.templated_newlines
263        };
264        match ref_str.binary_search(&char_pos) {
265            Ok(nl_idx) | Err(nl_idx) => {
266                if nl_idx > 0 {
267                    (nl_idx + 1, char_pos - ref_str[nl_idx - 1])
268                } else {
269                    // NB: line_pos is char_pos + 1 because character position is 0-indexed,
270                    // but the line position is 1-indexed.
271                    (1, char_pos + 1)
272                }
273            }
274        }
275    }
276
277    /// Create TemplatedFile from a string.
278    pub fn from_string(raw: SmolStr) -> TemplatedFile {
279        // TODO: Might need to deal with this unwrap
280        TemplatedFile::new(raw.into(), "<string>".to_string(), None, None, None).unwrap()
281    }
282
283    /// Get templated string
284    pub fn templated(&self) -> &str {
285        self.templated_str.as_deref().unwrap()
286    }
287
288    pub fn source_only_slices(&self) -> Vec<RawFileSlice> {
289        let mut ret_buff = vec![];
290        for element in &self.raw_sliced {
291            if element.is_source_only_slice() {
292                ret_buff.push(element.clone());
293            }
294        }
295        ret_buff
296    }
297
298    /// Get all raw slices (template and literal).
299    pub fn raw_sliced(&self) -> &[RawFileSlice] {
300        &self.raw_sliced
301    }
302
303    pub fn find_slice_indices_of_templated_pos(
304        &self,
305        templated_pos: usize,
306        start_idx: Option<usize>,
307        inclusive: Option<bool>,
308    ) -> Option<(usize, usize)> {
309        let start_idx = start_idx.unwrap_or(0);
310        let inclusive = inclusive.unwrap_or(true);
311
312        let mut first_idx: Option<usize> = None;
313        let mut last_idx = start_idx;
314
315        // Work through the sliced file, starting at the start_idx if given
316        // as an optimisation hint. The sliced_file is a list of TemplatedFileSlice
317        // which reference parts of the templated file and where they exist in the
318        // source.
319        for (idx, elem) in self.sliced_file[start_idx..self.sliced_file.len()]
320            .iter()
321            .enumerate()
322        {
323            last_idx = idx + start_idx;
324            if elem.templated_slice.end >= templated_pos {
325                if first_idx.is_none() {
326                    first_idx = Some(idx + start_idx);
327                }
328
329                if elem.templated_slice.start > templated_pos
330                    || (!inclusive && elem.templated_slice.end >= templated_pos)
331                {
332                    break;
333                }
334            }
335        }
336
337        // If we got to the end add another index
338        if last_idx == self.sliced_file.len() - 1 {
339            last_idx += 1;
340        }
341
342        first_idx.map(|first_idx| (first_idx, last_idx))
343    }
344
345    /// Convert a template slice to a source slice.
346    pub fn templated_slice_to_source_slice(
347        &self,
348        template_slice: Range<usize>,
349    ) -> Result<Range<usize>, String> {
350        if self.sliced_file.is_empty() {
351            return Ok(template_slice);
352        }
353
354        let sliced_file = self.sliced_file.clone();
355
356        let (ts_start_sf_start, ts_start_sf_stop) = self
357            .find_slice_indices_of_templated_pos(template_slice.start, None, None)
358            .ok_or("Position not found in templated file")?;
359
360        let ts_start_subsliced_file = &sliced_file[ts_start_sf_start..ts_start_sf_stop];
361
362        // Work out the insertion point
363        let mut insertion_point: isize = -1;
364        for elem in ts_start_subsliced_file.iter() {
365            // Do slice starts and ends
366            for &slice_elem in ["start", "stop"].iter() {
367                let elem_val = match slice_elem {
368                    "start" => elem.templated_slice.start,
369                    "stop" => elem.templated_slice.end,
370                    _ => panic!("Unexpected slice_elem"),
371                };
372
373                if elem_val == template_slice.start {
374                    let point = if slice_elem == "start" {
375                        elem.source_slice.start
376                    } else {
377                        elem.source_slice.end
378                    };
379
380                    let point: isize = point.try_into().unwrap();
381                    if insertion_point < 0 || point < insertion_point {
382                        insertion_point = point;
383                    }
384                    // We don't break here, because we might find ANOTHER
385                    // later which is actually earlier.
386                }
387            }
388        }
389
390        // Zero length slice.
391        if template_slice.start == template_slice.end {
392            // Is it on a join?
393            return if insertion_point >= 0 {
394                Ok(zero_slice(insertion_point.try_into().unwrap()))
395                // It's within a segment.
396            } else if !ts_start_subsliced_file.is_empty()
397                && ts_start_subsliced_file[0].slice_type == "literal"
398            {
399                let offset =
400                    template_slice.start - ts_start_subsliced_file[0].templated_slice.start;
401                Ok(zero_slice(
402                    ts_start_subsliced_file[0].source_slice.start + offset,
403                ))
404            } else {
405                Err(format!(
406                    "Attempting a single length slice within a templated section! {template_slice:?} within \
407                     {ts_start_subsliced_file:?}."
408                ))
409            };
410        }
411
412        let (ts_stop_sf_start, ts_stop_sf_stop) = self
413            .find_slice_indices_of_templated_pos(template_slice.end, None, Some(false))
414            .ok_or("Position not found in templated file")?;
415
416        let mut ts_start_sf_start = ts_start_sf_start;
417        if insertion_point >= 0 {
418            for elem in &sliced_file[ts_start_sf_start..] {
419                let insertion_point: usize = insertion_point.try_into().unwrap();
420                if elem.source_slice.start != insertion_point {
421                    ts_start_sf_start += 1;
422                } else {
423                    break;
424                }
425            }
426        }
427
428        let subslices = &sliced_file[usize::min(ts_start_sf_start, ts_stop_sf_start)
429            ..usize::max(ts_start_sf_stop, ts_stop_sf_stop)];
430
431        let start_slices = if ts_start_sf_start == ts_start_sf_stop {
432            return match ts_start_sf_start.cmp(&sliced_file.len()) {
433                Ordering::Greater => {
434                    panic!("Starting position higher than sliced file position")
435                }
436                Ordering::Less => Ok(sliced_file[1].source_slice.clone()),
437                Ordering::Equal => Ok(sliced_file.last().unwrap().source_slice.clone()),
438            };
439        } else {
440            &sliced_file[ts_start_sf_start..ts_start_sf_stop]
441        };
442
443        let stop_slices = if ts_stop_sf_start == ts_stop_sf_stop {
444            vec![sliced_file[ts_stop_sf_start].clone()]
445        } else {
446            sliced_file[ts_stop_sf_start..ts_stop_sf_stop].to_vec()
447        };
448
449        let source_start: isize = if insertion_point >= 0 {
450            insertion_point
451        } else if start_slices[0].slice_type == "literal" {
452            let offset = template_slice.start - start_slices[0].templated_slice.start;
453            (start_slices[0].source_slice.start + offset)
454                .try_into()
455                .unwrap()
456        } else {
457            start_slices[0].source_slice.start.try_into().unwrap()
458        };
459
460        let source_stop = if stop_slices.last().unwrap().slice_type == "literal" {
461            let offset = stop_slices.last().unwrap().templated_slice.end - template_slice.end;
462            stop_slices.last().unwrap().source_slice.end - offset
463        } else {
464            stop_slices.last().unwrap().source_slice.end
465        };
466
467        let source_slice;
468        if source_start > source_stop.try_into().unwrap() {
469            let mut source_start = usize::MAX;
470            let mut source_stop = 0;
471            for elem in subslices {
472                source_start = usize::min(source_start, elem.source_slice.start);
473                source_stop = usize::max(source_stop, elem.source_slice.end);
474            }
475            source_slice = source_start..source_stop;
476        } else {
477            source_slice = source_start.try_into().unwrap()..source_stop;
478        }
479
480        Ok(source_slice)
481    }
482
483    ///  Work out whether a slice of the source file is a literal or not.
484    pub fn is_source_slice_literal(&self, source_slice: &Range<usize>) -> bool {
485        // No sliced file? Everything is literal
486        if self.raw_sliced.is_empty() {
487            return true;
488        };
489
490        // Zero length slice. It's a literal, because it's definitely not templated.
491        if source_slice.start == source_slice.end {
492            return true;
493        };
494
495        let mut is_literal = true;
496        for raw_slice in &self.raw_sliced {
497            // Reset if we find a literal and we're up to the start
498            // otherwise set false.
499            if raw_slice.source_idx <= source_slice.start {
500                is_literal = raw_slice.slice_type == "literal";
501            } else if raw_slice.source_idx >= source_slice.end {
502                break;
503            } else if raw_slice.slice_type != "literal" {
504                is_literal = false;
505            };
506        }
507        is_literal
508    }
509
510    /// Return a list of the raw slices spanning a set of indices.
511    pub(crate) fn raw_slices_spanning_source_slice(
512        &self,
513        source_slice: &Range<usize>,
514    ) -> Vec<RawFileSlice> {
515        // Special case: The source_slice is at the end of the file.
516        let last_raw_slice = self.raw_sliced.last().unwrap();
517        if source_slice.start >= last_raw_slice.source_idx + last_raw_slice.raw.len() {
518            return Vec::new();
519        }
520
521        // First find the start index
522        let mut raw_slice_idx = 0;
523        // Move the raw pointer forward to the start of this patch
524        while raw_slice_idx + 1 < self.raw_sliced.len()
525            && self.raw_sliced[raw_slice_idx + 1].source_idx <= source_slice.start
526        {
527            raw_slice_idx += 1;
528        }
529
530        // Find slice index of the end of this patch.
531        let mut slice_span = 1;
532        while raw_slice_idx + slice_span < self.raw_sliced.len()
533            && self.raw_sliced[raw_slice_idx + slice_span].source_idx < source_slice.end
534        {
535            slice_span += 1;
536        }
537
538        // Return the raw slices
539        self.raw_sliced[raw_slice_idx..(raw_slice_idx + slice_span)].to_vec()
540    }
541}
542
543/// Find the indices of all newlines in a string.
544pub fn iter_indices_of_newlines(raw_str: &str) -> impl Iterator<Item = usize> + '_ {
545    // TODO: This may be optimize-able by not doing it all up front.
546    raw_str.match_indices('\n').map(|(idx, _)| idx)
547}
548
549#[cfg_attr(feature = "stringify", derive(Serialize, Deserialize))]
550#[derive(Debug, PartialEq, Eq, Clone, Hash)]
551pub enum RawFileSliceType {
552    Comment,
553    BlockEnd,
554    BlockStart,
555    BlockMid,
556}
557
558/// A slice referring to a raw file.
559#[cfg_attr(feature = "stringify", derive(Serialize, Deserialize))]
560#[derive(Debug, PartialEq, Eq, Clone, Hash)]
561pub struct RawFileSlice {
562    /// Source string
563    raw: String,
564    pub(crate) slice_type: String,
565    /// Offset from beginning of source string
566    pub source_idx: usize,
567    slice_subtype: Option<RawFileSliceType>,
568    /// Block index, incremented on start or end block tags, e.g. "if", "for"
569    block_idx: usize,
570}
571
572impl RawFileSlice {
573    pub fn new(
574        raw: String,
575        slice_type: String,
576        source_idx: usize,
577        slice_subtype: Option<RawFileSliceType>,
578        block_idx: Option<usize>,
579    ) -> Self {
580        Self {
581            raw,
582            slice_type,
583            source_idx,
584            slice_subtype,
585            block_idx: block_idx.unwrap_or(0),
586        }
587    }
588}
589
590impl RawFileSlice {
591    /// Return the closing index of this slice.
592    fn end_source_idx(&self) -> usize {
593        self.source_idx + self.raw.len()
594    }
595
596    /// Return the a slice object for this slice.
597    pub fn source_slice(&self) -> Range<usize> {
598        self.source_idx..self.end_source_idx()
599    }
600
601    /// Return the raw source string for this slice.
602    pub fn raw(&self) -> &str {
603        &self.raw
604    }
605
606    /// Return the slice type (e.g., "literal", "templated", "comment", etc.).
607    pub fn slice_type(&self) -> &str {
608        &self.slice_type
609    }
610
611    /// Based on its slice_type, does it only appear in the *source*?
612    /// There are some slice types which are automatically source only.
613    /// There are *also* some which are source only because they render
614    /// to an empty string.
615    fn is_source_only_slice(&self) -> bool {
616        // TODO: should any new logic go here?. Slice Type could probably go from String
617        // To Enum
618        matches!(
619            self.slice_type.as_str(),
620            "comment" | "block_end" | "block_start" | "block_mid"
621        )
622    }
623}
624
625/// Build a mapping from character (Unicode code point) indices to byte indices.
626///
627/// Python uses character-based indices, while Rust's `String::len()` returns
628/// byte length (UTF-8). This function creates a lookup table to convert between
629/// the two coordinate systems.
630///
631/// The returned vector has length `num_chars + 1`, where entry `i` gives the
632/// byte offset of the `i`-th character, and the last entry is the total byte
633/// length (for end-of-string conversions).
634pub fn char_to_byte_indices(s: &str) -> Vec<usize> {
635    let mut indices: Vec<usize> = s.char_indices().map(|(byte_idx, _)| byte_idx).collect();
636    indices.push(s.len());
637    indices
638}
639
640/// Convert a character-based index to a byte-based index using a precomputed
641/// mapping table from [`char_to_byte_indices`].
642///
643/// # Panics
644///
645/// Panics if `char_idx` is greater than or equal to `char_to_byte.len()`.
646/// This indicates a bug in the caller (e.g. using an index that is not
647/// derived from the same string used to build `char_to_byte`).
648pub fn char_idx_to_byte_idx(char_to_byte: &[usize], char_idx: usize) -> usize {
649    assert!(
650        char_idx < char_to_byte.len(),
651        "char_idx_to_byte_idx: char_idx {char_idx} out of bounds for mapping of length {}",
652        char_to_byte.len()
653    );
654    char_to_byte[char_idx]
655}
656
657#[cfg(test)]
658mod tests {
659    use super::*;
660
661    #[test]
662    fn test_char_to_byte_indices_ascii() {
663        let indices = char_to_byte_indices("hello");
664        assert_eq!(indices, vec![0, 1, 2, 3, 4, 5]);
665    }
666
667    #[test]
668    fn test_char_to_byte_indices_multibyte() {
669        // "あいう" = 3 chars, 9 bytes (each Japanese char is 3 bytes in UTF-8)
670        let indices = char_to_byte_indices("あいう");
671        assert_eq!(indices, vec![0, 3, 6, 9]);
672    }
673
674    #[test]
675    fn test_char_to_byte_indices_mixed() {
676        // "aあb" = 3 chars; 'a'=1byte, 'あ'=3bytes, 'b'=1byte => total 5 bytes
677        let indices = char_to_byte_indices("aあb");
678        assert_eq!(indices, vec![0, 1, 4, 5]);
679    }
680
681    #[test]
682    fn test_char_to_byte_indices_accented() {
683        // "café" = 4 chars; 'c'=1, 'a'=1, 'f'=1, 'é'=2 => total 5 bytes
684        let indices = char_to_byte_indices("café");
685        assert_eq!(indices, vec![0, 1, 2, 3, 5]);
686    }
687
688    #[test]
689    fn test_char_to_byte_indices_empty() {
690        let indices = char_to_byte_indices("");
691        assert_eq!(indices, vec![0]);
692    }
693
694    #[test]
695    fn test_char_idx_to_byte_idx_conversion() {
696        let indices = char_to_byte_indices("aあb");
697        assert_eq!(char_idx_to_byte_idx(&indices, 0), 0);
698        assert_eq!(char_idx_to_byte_idx(&indices, 1), 1);
699        assert_eq!(char_idx_to_byte_idx(&indices, 2), 4);
700        assert_eq!(char_idx_to_byte_idx(&indices, 3), 5);
701    }
702
703    #[test]
704    fn test_templated_file_multibyte_consistency_check() {
705        // Regression test: TemplatedFile::new should not panic when source
706        // contains multi-byte UTF-8 characters, as long as indices are
707        // byte-based. This simulates the scenario after Python char indices
708        // have been converted to Rust byte indices.
709        //
710        // Source: "-- 日本語\nSELECT 1"
711        //   "-- 日本語" = 12 bytes (2+1+3+3+3 = '--'+' '+'日'+'本'+'語')
712        //   "\n" = 1 byte
713        //   "SELECT 1" = 8 bytes
714        //   Total: 21 bytes
715        let source = "-- 日本語\nSELECT 1".to_string();
716        assert_eq!(source.len(), 21);
717
718        let raw_sliced = vec![RawFileSlice::new(
719            source.clone(),
720            "literal".to_string(),
721            0,
722            None,
723            None,
724        )];
725        let sliced_file = vec![TemplatedFileSlice::new(
726            "literal",
727            0..source.len(),
728            0..source.len(),
729        )];
730
731        // This must not panic
732        let tf = TemplatedFile::new(
733            source.clone(),
734            "test.sql".to_string(),
735            Some(source.clone()),
736            Some(sliced_file),
737            Some(raw_sliced),
738        )
739        .unwrap();
740        assert_eq!(tf.source_str, source);
741    }
742
743    #[test]
744    fn test_templated_file_multibyte_multiple_raw_slices() {
745        // Simulates a templated file with multi-byte characters split across
746        // multiple raw slices, using byte-based indices (post-conversion).
747        //
748        // Source: "SELECT 'café'" = 14 bytes ('é' is 2 bytes)
749        // Split into: "SELECT '" (8 bytes) + "café" (5 bytes) + "'" (1 byte)
750        let source = "SELECT 'café'".to_string();
751        assert_eq!(source.len(), 14);
752
753        let raw_sliced = vec![
754            RawFileSlice::new("SELECT '".to_string(), "literal".to_string(), 0, None, None),
755            RawFileSlice::new(
756                "café".to_string(),
757                "templated".to_string(),
758                8, // byte offset
759                None,
760                None,
761            ),
762            RawFileSlice::new(
763                "'".to_string(),
764                "literal".to_string(),
765                13, // byte offset (8 + 5)
766                None,
767                None,
768            ),
769        ];
770        let sliced_file = vec![
771            TemplatedFileSlice::new("literal", 0..8, 0..8),
772            TemplatedFileSlice::new("templated", 8..13, 8..13),
773            TemplatedFileSlice::new("literal", 13..14, 13..14),
774        ];
775
776        let tf = TemplatedFile::new(
777            source.clone(),
778            "test.sql".to_string(),
779            Some(source.clone()),
780            Some(sliced_file),
781            Some(raw_sliced),
782        )
783        .unwrap();
784        assert_eq!(tf.source_str, source);
785    }
786
787    #[test]
788    #[should_panic(expected = "Consistency fail on running source length")]
789    fn test_templated_file_char_indices_cause_panic() {
790        // Demonstrates that using Python's character-based indices (without
791        // conversion) causes a panic. This is the bug scenario.
792        //
793        // Source: "aあb" = 3 chars in Python, 5 bytes in Rust
794        // If we use char indices (0, 2) instead of byte indices (0, 4) for
795        // the second slice, the consistency check fails.
796        let source = "aあb".to_string();
797
798        let raw_sliced = vec![
799            RawFileSlice::new(
800                "aあ".to_string(), // 4 bytes
801                "literal".to_string(),
802                0,
803                None,
804                None,
805            ),
806            RawFileSlice::new(
807                "b".to_string(),
808                "literal".to_string(),
809                2, // WRONG: char index from Python (should be 4 for bytes)
810                None,
811                None,
812            ),
813        ];
814        let sliced_file = vec![
815            TemplatedFileSlice::new("literal", 0..2, 0..2),
816            TemplatedFileSlice::new("literal", 2..3, 2..3),
817        ];
818
819        // This SHOULD panic because source_idx=2 != pos=4
820        let _ = TemplatedFile::new(
821            source,
822            "test.sql".to_string(),
823            Some("aあb".to_string()),
824            Some(sliced_file),
825            Some(raw_sliced),
826        );
827    }
828
829    #[test]
830    fn test_indices_of_newlines() {
831        vec![
832            ("", vec![]),
833            ("foo", vec![]),
834            ("foo\nbar", vec![3]),
835            ("\nfoo\n\nbar\nfoo\n\nbar\n", vec![0, 4, 5, 9, 13, 14, 18]),
836        ]
837        .into_iter()
838        .for_each(|(in_str, expected)| {
839            assert_eq!(
840                expected,
841                iter_indices_of_newlines(in_str).collect::<Vec<usize>>()
842            )
843        });
844    }
845
846    // const SIMPLE_SOURCE_STR: &str = "01234\n6789{{foo}}fo\nbarss";
847    // const SIMPLE_TEMPLATED_STR: &str = "01234\n6789x\nfo\nbarfss";
848
849    fn simple_sliced_file() -> Vec<TemplatedFileSlice> {
850        vec![
851            TemplatedFileSlice::new("literal", 0..10, 0..10),
852            TemplatedFileSlice::new("templated", 10..17, 10..12),
853            TemplatedFileSlice::new("literal", 17..25, 12..20),
854        ]
855    }
856
857    fn simple_raw_sliced_file() -> [RawFileSlice; 3] {
858        [
859            RawFileSlice::new("x".repeat(10), "literal".to_string(), 0, None, None),
860            RawFileSlice::new("x".repeat(7), "templated".to_string(), 10, None, None),
861            RawFileSlice::new("x".repeat(8), "literal".to_string(), 17, None, None),
862        ]
863    }
864
865    fn complex_sliced_file() -> Vec<TemplatedFileSlice> {
866        vec![
867            TemplatedFileSlice::new("literal", 0..13, 0..13),
868            TemplatedFileSlice::new("comment", 13..29, 13..13),
869            TemplatedFileSlice::new("literal", 29..44, 13..28),
870            TemplatedFileSlice::new("block_start", 44..68, 28..28),
871            TemplatedFileSlice::new("literal", 68..81, 28..41),
872            TemplatedFileSlice::new("templated", 81..86, 41..42),
873            TemplatedFileSlice::new("literal", 86..110, 42..66),
874            TemplatedFileSlice::new("templated", 68..86, 66..76),
875            TemplatedFileSlice::new("literal", 68..81, 76..89),
876            TemplatedFileSlice::new("templated", 81..86, 89..90),
877            TemplatedFileSlice::new("literal", 86..110, 90..114),
878            TemplatedFileSlice::new("templated", 68..86, 114..125),
879            TemplatedFileSlice::new("literal", 68..81, 125..138),
880            TemplatedFileSlice::new("templated", 81..86, 138..139),
881            TemplatedFileSlice::new("literal", 86..110, 139..163),
882            TemplatedFileSlice::new("templated", 110..123, 163..166),
883            TemplatedFileSlice::new("literal", 123..132, 166..175),
884            TemplatedFileSlice::new("block_end", 132..144, 175..175),
885            TemplatedFileSlice::new("literal", 144..155, 175..186),
886            TemplatedFileSlice::new("block_start", 155..179, 186..186),
887            TemplatedFileSlice::new("literal", 179..189, 186..196),
888            TemplatedFileSlice::new("templated", 189..194, 196..197),
889            TemplatedFileSlice::new("literal", 194..203, 197..206),
890            TemplatedFileSlice::new("literal", 179..189, 206..216),
891            TemplatedFileSlice::new("templated", 189..194, 216..217),
892            TemplatedFileSlice::new("literal", 194..203, 217..226),
893            TemplatedFileSlice::new("literal", 179..189, 226..236),
894            TemplatedFileSlice::new("templated", 189..194, 236..237),
895            TemplatedFileSlice::new("literal", 194..203, 237..246),
896            TemplatedFileSlice::new("block_end", 203..215, 246..246),
897            TemplatedFileSlice::new("literal", 215..230, 246..261),
898        ]
899    }
900
901    fn complex_raw_sliced_file() -> Vec<RawFileSlice> {
902        vec![
903            RawFileSlice::new(
904                "x".repeat(13).to_string(),
905                "literal".to_string(),
906                0,
907                None,
908                None,
909            ),
910            RawFileSlice::new(
911                "x".repeat(16).to_string(),
912                "comment".to_string(),
913                13,
914                None,
915                None,
916            ),
917            RawFileSlice::new(
918                "x".repeat(15).to_string(),
919                "literal".to_string(),
920                29,
921                None,
922                None,
923            ),
924            RawFileSlice::new(
925                "x".repeat(24).to_string(),
926                "block_start".to_string(),
927                44,
928                None,
929                None,
930            ),
931            RawFileSlice::new(
932                "x".repeat(13).to_string(),
933                "literal".to_string(),
934                68,
935                None,
936                None,
937            ),
938            RawFileSlice::new(
939                "x".repeat(5).to_string(),
940                "templated".to_string(),
941                81,
942                None,
943                None,
944            ),
945            RawFileSlice::new(
946                "x".repeat(24).to_string(),
947                "literal".to_string(),
948                86,
949                None,
950                None,
951            ),
952            RawFileSlice::new(
953                "x".repeat(13).to_string(),
954                "templated".to_string(),
955                110,
956                None,
957                None,
958            ),
959            RawFileSlice::new(
960                "x".repeat(9).to_string(),
961                "literal".to_string(),
962                123,
963                None,
964                None,
965            ),
966            RawFileSlice::new(
967                "x".repeat(12).to_string(),
968                "block_end".to_string(),
969                132,
970                None,
971                None,
972            ),
973            RawFileSlice::new(
974                "x".repeat(11).to_string(),
975                "literal".to_string(),
976                144,
977                None,
978                None,
979            ),
980            RawFileSlice::new(
981                "x".repeat(24).to_string(),
982                "block_start".to_string(),
983                155,
984                None,
985                None,
986            ),
987            RawFileSlice::new(
988                "x".repeat(10).to_string(),
989                "literal".to_string(),
990                179,
991                None,
992                None,
993            ),
994            RawFileSlice::new(
995                "x".repeat(5).to_string(),
996                "templated".to_string(),
997                189,
998                None,
999                None,
1000            ),
1001            RawFileSlice::new(
1002                "x".repeat(9).to_string(),
1003                "literal".to_string(),
1004                194,
1005                None,
1006                None,
1007            ),
1008            RawFileSlice::new(
1009                "x".repeat(12).to_string(),
1010                "block_end".to_string(),
1011                203,
1012                None,
1013                None,
1014            ),
1015            RawFileSlice::new(
1016                "x".repeat(15).to_string(),
1017                "literal".to_string(),
1018                215,
1019                None,
1020                None,
1021            ),
1022        ]
1023    }
1024
1025    struct FileKwargs {
1026        f_name: String,
1027        source_str: String,
1028        templated_str: Option<String>,
1029        sliced_file: Vec<TemplatedFileSlice>,
1030        raw_sliced_file: Vec<RawFileSlice>,
1031    }
1032
1033    fn simple_file_kwargs() -> FileKwargs {
1034        FileKwargs {
1035            f_name: "test.sql".to_string(),
1036            source_str: "01234\n6789{{foo}}fo\nbarss".to_string(),
1037            templated_str: Some("01234\n6789x\nfo\nbarss".to_string()),
1038            sliced_file: simple_sliced_file().to_vec(),
1039            raw_sliced_file: simple_raw_sliced_file().to_vec(),
1040        }
1041    }
1042
1043    fn complex_file_kwargs() -> FileKwargs {
1044        FileKwargs {
1045            f_name: "test.sql".to_string(),
1046            source_str: complex_raw_sliced_file()
1047                .iter()
1048                .fold(String::new(), |acc, x| acc + &x.raw),
1049            templated_str: None,
1050            sliced_file: complex_sliced_file().to_vec(),
1051            raw_sliced_file: complex_raw_sliced_file().to_vec(),
1052        }
1053    }
1054
1055    #[test]
1056    /// Test TemplatedFile.get_line_pos_of_char_pos.
1057    fn test_templated_file_get_line_pos_of_char_pos() {
1058        let tests = [
1059            (simple_file_kwargs(), 0, 1, 1),
1060            (simple_file_kwargs(), 20, 3, 1),
1061            (simple_file_kwargs(), 24, 3, 5),
1062        ];
1063
1064        for test in tests {
1065            let kwargs = test.0;
1066
1067            let tf = TemplatedFile::new(
1068                kwargs.source_str,
1069                kwargs.f_name,
1070                kwargs.templated_str,
1071                Some(kwargs.sliced_file),
1072                Some(kwargs.raw_sliced_file),
1073            )
1074            .unwrap();
1075
1076            let (res_line_no, res_line_pos) = tf.get_line_pos_of_char_pos(test.1, true);
1077
1078            assert_eq!(res_line_no, test.2);
1079            assert_eq!(res_line_pos, test.3);
1080        }
1081    }
1082
1083    #[test]
1084    fn test_templated_file_find_slice_indices_of_templated_pos() {
1085        let tests = vec![
1086            // "templated_position,inclusive,file_slices,sliced_idx_start,sliced_idx_stop",
1087            // TODO Fix these
1088            // (100, true, complex_file_kwargs(), 10, 11),
1089            // (13, true, complex_file_kwargs(), 0, 3),
1090            // (28, true, complex_file_kwargs(), 2, 5),
1091            // # Check end slicing.
1092            (12, true, simple_file_kwargs(), 1, 3),
1093            (20, true, simple_file_kwargs(), 2, 3),
1094            // Check inclusivity
1095            // (13, false, complex_file_kwargs(), 0, 1),
1096        ];
1097
1098        for test in tests {
1099            let args = test.2;
1100
1101            let file = TemplatedFile::new(
1102                args.source_str,
1103                args.f_name,
1104                args.templated_str,
1105                Some(args.sliced_file),
1106                Some(args.raw_sliced_file),
1107            )
1108            .unwrap();
1109
1110            let (res_start, res_stop) = file
1111                .find_slice_indices_of_templated_pos(test.0, None, Some(test.1))
1112                .unwrap();
1113
1114            assert_eq!(res_start, test.3);
1115            assert_eq!(res_stop, test.4);
1116        }
1117    }
1118
1119    #[test]
1120    /// Test TemplatedFile.templated_slice_to_source_slice
1121    fn test_templated_file_templated_slice_to_source_slice() {
1122        let test_cases = vec![
1123            // Simple example
1124            (
1125                5..10,
1126                5..10,
1127                true,
1128                FileKwargs {
1129                    sliced_file: vec![TemplatedFileSlice::new("literal", 0..20, 0..20)],
1130                    raw_sliced_file: vec![RawFileSlice::new(
1131                        "x".repeat(20),
1132                        "literal".to_string(),
1133                        0,
1134                        None,
1135                        None,
1136                    )],
1137                    source_str: "x".repeat(20),
1138                    f_name: "foo.sql".to_string(),
1139                    templated_str: None,
1140                },
1141            ),
1142            // Trimming the end of a literal (with things that follow).
1143            (10..13, 10..13, true, complex_file_kwargs()),
1144            // // Unrealistic, but should still work
1145            (
1146                5..10,
1147                55..60,
1148                true,
1149                FileKwargs {
1150                    sliced_file: vec![TemplatedFileSlice::new("literal", 50..70, 0..20)],
1151                    raw_sliced_file: vec![
1152                        RawFileSlice::new("x".repeat(50), "literal".to_string(), 0, None, None),
1153                        RawFileSlice::new("x".repeat(20), "literal".to_string(), 50, None, None),
1154                    ],
1155                    source_str: "x".repeat(70),
1156                    f_name: "foo.sql".to_string(),
1157                    templated_str: None,
1158                },
1159            ),
1160            // // Spanning a template
1161            (5..15, 5..20, false, simple_file_kwargs()),
1162            // // Handling templated
1163            (
1164                5..15,
1165                0..25,
1166                false,
1167                FileKwargs {
1168                    sliced_file: simple_file_kwargs()
1169                        .sliced_file
1170                        .iter()
1171                        .map(|slc| {
1172                            TemplatedFileSlice::new(
1173                                "templated",
1174                                slc.source_slice.clone(),
1175                                slc.templated_slice.clone(),
1176                            )
1177                        })
1178                        .collect(),
1179                    raw_sliced_file: simple_file_kwargs()
1180                        .raw_sliced_file
1181                        .iter()
1182                        .map(|slc| {
1183                            RawFileSlice::new(
1184                                slc.raw.to_string(),
1185                                "templated".to_string(),
1186                                slc.source_idx,
1187                                None,
1188                                None,
1189                            )
1190                        })
1191                        .collect(),
1192                    ..simple_file_kwargs()
1193                },
1194            ),
1195            // // Handling single length slices
1196            (10..10, 10..10, true, simple_file_kwargs()),
1197            (12..12, 17..17, true, simple_file_kwargs()),
1198            // // Dealing with single length elements
1199            (
1200                20..20,
1201                25..25,
1202                true,
1203                FileKwargs {
1204                    sliced_file: simple_file_kwargs()
1205                        .sliced_file
1206                        .into_iter()
1207                        .chain(vec![TemplatedFileSlice::new("comment", 25..35, 20..20)])
1208                        .collect(),
1209                    raw_sliced_file: simple_file_kwargs()
1210                        .raw_sliced_file
1211                        .into_iter()
1212                        .chain(vec![RawFileSlice::new(
1213                            "x".repeat(10),
1214                            "comment".to_string(),
1215                            25,
1216                            None,
1217                            None,
1218                        )])
1219                        .collect(),
1220                    source_str: simple_file_kwargs().source_str.to_string() + &"x".repeat(10),
1221                    ..simple_file_kwargs()
1222                },
1223            ),
1224            // // Just more test coverage
1225            (43..43, 87..87, true, complex_file_kwargs()),
1226            (13..13, 13..13, true, complex_file_kwargs()),
1227            (186..186, 155..155, true, complex_file_kwargs()),
1228            // Backward slicing.
1229            (
1230                100..130,
1231                // NB This actually would reference the wrong way around if we
1232                // just take the points. Here we should handle it gracefully.
1233                68..110,
1234                false,
1235                complex_file_kwargs(),
1236            ),
1237        ];
1238
1239        for (in_slice, out_slice, is_literal, tf_kwargs) in test_cases {
1240            let file = TemplatedFile::new(
1241                tf_kwargs.source_str,
1242                tf_kwargs.f_name,
1243                tf_kwargs.templated_str,
1244                Some(tf_kwargs.sliced_file),
1245                Some(tf_kwargs.raw_sliced_file),
1246            )
1247            .unwrap();
1248
1249            let source_slice = file.templated_slice_to_source_slice(in_slice).unwrap();
1250            let literal_test = file.is_source_slice_literal(&source_slice);
1251
1252            assert_eq!((is_literal, source_slice), (literal_test, out_slice));
1253        }
1254    }
1255
1256    #[test]
1257    /// Test TemplatedFile.source_only_slices
1258    fn test_templated_file_source_only_slices() {
1259        let test_cases = vec![
1260            // Comment example
1261            (
1262                TemplatedFile::new(
1263                    format!("{}{}{}", "a".repeat(10), "{# b #}", "a".repeat(10)),
1264                    "test".to_string(),
1265                    None,
1266                    Some(vec![
1267                        TemplatedFileSlice::new("literal", 0..10, 0..10),
1268                        TemplatedFileSlice::new("templated", 10..17, 10..10),
1269                        TemplatedFileSlice::new("literal", 17..27, 10..20),
1270                    ]),
1271                    Some(vec![
1272                        RawFileSlice::new(
1273                            "a".repeat(10).to_string(),
1274                            "literal".to_string(),
1275                            0,
1276                            None,
1277                            None,
1278                        ),
1279                        RawFileSlice::new(
1280                            "{# b #}".to_string(),
1281                            "comment".to_string(),
1282                            10,
1283                            None,
1284                            None,
1285                        ),
1286                        RawFileSlice::new(
1287                            "a".repeat(10).to_string(),
1288                            "literal".to_string(),
1289                            17,
1290                            None,
1291                            None,
1292                        ),
1293                    ]),
1294                )
1295                .unwrap(),
1296                vec![RawFileSlice::new(
1297                    "{# b #}".to_string(),
1298                    "comment".to_string(),
1299                    10,
1300                    None,
1301                    None,
1302                )],
1303            ),
1304            // Template tags aren't source only.
1305            (
1306                TemplatedFile::new(
1307                    "aaa{{ b }}aaa".to_string(),
1308                    "test".to_string(),
1309                    None,
1310                    Some(vec![
1311                        TemplatedFileSlice::new("literal", 0..3, 0..3),
1312                        TemplatedFileSlice::new("templated", 3..10, 3..6),
1313                        TemplatedFileSlice::new("literal", 10..13, 6..9),
1314                    ]),
1315                    Some(vec![
1316                        RawFileSlice::new("aaa".to_string(), "literal".to_string(), 0, None, None),
1317                        RawFileSlice::new(
1318                            "{{ b }}".to_string(),
1319                            "templated".to_string(),
1320                            3,
1321                            None,
1322                            None,
1323                        ),
1324                        RawFileSlice::new("aaa".to_string(), "literal".to_string(), 10, None, None),
1325                    ]),
1326                )
1327                .unwrap(),
1328                vec![],
1329            ),
1330        ];
1331
1332        for (file, expected) in test_cases {
1333            assert_eq!(file.source_only_slices(), expected, "Failed for {:?}", file);
1334        }
1335    }
1336}