datafusion_common/
spans.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::cmp::{self, Ordering};
19use std::fmt;
20use std::hash::{Hash, Hasher};
21
22/// Represents a location, determined by a line and a column number, in the
23/// original SQL query.
24#[derive(Eq, PartialEq, Hash, Clone, Copy, Ord, PartialOrd)]
25pub struct Location {
26    /// Line number, starting from 1.
27    ///
28    /// Note: Line 0 is used for empty spans
29    pub line: u64,
30    /// Line column, starting from 1.
31    ///
32    /// Note: Column 0 is used for empty spans
33    pub column: u64,
34}
35
36impl fmt::Debug for Location {
37    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
38        write!(f, "Location({},{})", self.line, self.column)
39    }
40}
41
42#[cfg(feature = "sql")]
43impl From<sqlparser::tokenizer::Location> for Location {
44    fn from(value: sqlparser::tokenizer::Location) -> Self {
45        Self {
46            line: value.line,
47            column: value.column,
48        }
49    }
50}
51
52/// Represents an interval of characters in the original SQL query.
53#[derive(Eq, PartialEq, Hash, Clone, PartialOrd, Ord, Copy)]
54pub struct Span {
55    pub start: Location,
56    pub end: Location,
57}
58
59impl fmt::Debug for Span {
60    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
61        write!(f, "Span({:?}..{:?})", self.start, self.end)
62    }
63}
64
65impl Span {
66    /// Creates a new [`Span`] from a start and an end [`Location`].
67    pub fn new(start: Location, end: Location) -> Self {
68        Self { start, end }
69    }
70
71    /// Convert a [`Span`](sqlparser::tokenizer::Span) from the parser, into a
72    /// DataFusion [`Span`]. If the input span is empty (line 0 column 0, to
73    /// line 0 column 0), then [`None`] is returned.
74    #[cfg(feature = "sql")]
75    pub fn try_from_sqlparser_span(span: sqlparser::tokenizer::Span) -> Option<Span> {
76        if span == sqlparser::tokenizer::Span::empty() {
77            None
78        } else {
79            Some(Span {
80                start: span.start.into(),
81                end: span.end.into(),
82            })
83        }
84    }
85
86    /// Returns the smallest Span that contains both `self` and `other`
87    ///
88    /// # Examples
89    /// ```
90    /// # use sqlparser::tokenizer::{Span, Location};
91    /// // line 1, column1 -> line 2, column 5
92    /// let span1 = Span::new(Location::new(1, 1), Location::new(2, 5));
93    /// // line 2, column 3 -> line 3, column 7
94    /// let span2 = Span::new(Location::new(2, 3), Location::new(3, 7));
95    /// // Union of the two is the min/max of the two spans
96    /// // line 1, column 1 -> line 3, column 7
97    /// let union = span1.union(&span2);
98    /// assert_eq!(union, Span::new(Location::new(1, 1), Location::new(3, 7)));
99    /// ```
100    pub fn union(&self, other: &Span) -> Span {
101        Span {
102            start: cmp::min(self.start, other.start),
103            end: cmp::max(self.end, other.end),
104        }
105    }
106
107    /// Same as [Span::union] for `Option<Span>`.
108    ///
109    /// If `other` is `None`, `self` is returned.
110    pub fn union_opt(&self, other: &Option<Span>) -> Span {
111        match other {
112            Some(other) => self.union(other),
113            None => *self,
114        }
115    }
116
117    /// Return the [Span::union] of all spans in the iterator.
118    ///
119    /// If the iterator is empty, [`None`] is returned.
120    ///
121    /// # Example
122    /// ```
123    /// # use sqlparser::tokenizer::{Span, Location};
124    /// let spans = vec![
125    ///     Span::new(Location::new(1, 1), Location::new(2, 5)),
126    ///     Span::new(Location::new(2, 3), Location::new(3, 7)),
127    ///     Span::new(Location::new(3, 1), Location::new(4, 2)),
128    /// ];
129    /// // line 1, column 1 -> line 4, column 2
130    /// assert_eq!(
131    ///   Span::union_iter(spans),
132    ///   Span::new(Location::new(1, 1), Location::new(4, 2))
133    /// );
134    pub fn union_iter<I: IntoIterator<Item = Span>>(iter: I) -> Option<Span> {
135        iter.into_iter().reduce(|acc, item| acc.union(&item))
136    }
137}
138
139/// A collection of [`Span`], meant to be used as a field of entities whose
140/// location in the original SQL query is desired to be tracked. Sometimes an
141/// entity can have multiple spans. e.g. if you want to track the position of
142/// the column a that comes from SELECT 1 AS a UNION ALL SELECT 2 AS a you'll
143/// need two spans.
144#[derive(Debug, Clone)]
145// Store the first [`Span`] on the stack because that is by far the most common
146// case. More will spill onto the heap.
147pub struct Spans(pub Vec<Span>);
148
149impl Spans {
150    /// Creates a new empty [`Spans`] with no [`Span`].
151    pub fn new() -> Self {
152        Spans(Vec::new())
153    }
154
155    /// Returns the first [`Span`], if any. This is useful when you know that
156    /// there's gonna be only one [`Span`] at most.
157    pub fn first(&self) -> Option<Span> {
158        self.0.first().copied()
159    }
160
161    /// Returns a slice of the [`Span`]s.
162    pub fn get_spans(&self) -> &[Span] {
163        &self.0
164    }
165
166    /// Adds a [`Span`] to the collection.
167    pub fn add_span(&mut self, span: Span) {
168        self.0.push(span);
169    }
170
171    /// Iterates over the [`Span`]s.
172    pub fn iter(&self) -> impl Iterator<Item = &Span> {
173        self.0.iter()
174    }
175}
176
177impl Default for Spans {
178    fn default() -> Self {
179        Self::new()
180    }
181}
182
183// Since [`Spans`] will be used as a field in other structs, we don't want it to
184// interfere with the equality and ordering of the entities themselves, since
185// this is just diagnostics information for the end user.
186impl PartialEq for Spans {
187    fn eq(&self, _other: &Self) -> bool {
188        true
189    }
190}
191
192// Since [`Spans`] will be used as a field in other structs, we don't want it to
193// interfere with the equality and ordering of the entities themselves, since
194// this is just diagnostics information for the end user.
195impl Eq for Spans {}
196
197// Since [`Spans`] will be used as a field in other structs, we don't want it to
198// interfere with the equality and ordering of the entities themselves, since
199// this is just diagnostics information for the end user.
200impl PartialOrd for Spans {
201    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
202        Some(self.cmp(other))
203    }
204}
205
206// Since [`Spans`] will be used as a field in other structs, we don't want it to
207// interfere with the equality and ordering of the entities themselves, since
208// this is just diagnostics information for the end user.
209impl Ord for Spans {
210    fn cmp(&self, _other: &Self) -> Ordering {
211        Ordering::Equal
212    }
213}
214
215// Since [`Spans`] will be used as a field in other structs, we don't want it to
216// interfere with the equality and ordering of the entities themselves, since
217// this is just diagnostics information for the end user.
218impl Hash for Spans {
219    fn hash<H: Hasher>(&self, _state: &mut H) {}
220}