datafusion_common/spans.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::cmp::{self, Ordering};
19use std::fmt;
20use std::hash::{Hash, Hasher};
21
22/// Represents a location, determined by a line and a column number, in the
23/// original SQL query.
24#[derive(Eq, PartialEq, Hash, Clone, Copy, Ord, PartialOrd)]
25pub struct Location {
26 /// Line number, starting from 1.
27 ///
28 /// Note: Line 0 is used for empty spans
29 pub line: u64,
30 /// Line column, starting from 1.
31 ///
32 /// Note: Column 0 is used for empty spans
33 pub column: u64,
34}
35
36impl fmt::Debug for Location {
37 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
38 write!(f, "Location({},{})", self.line, self.column)
39 }
40}
41
42#[cfg(feature = "sql")]
43impl From<sqlparser::tokenizer::Location> for Location {
44 fn from(value: sqlparser::tokenizer::Location) -> Self {
45 Self {
46 line: value.line,
47 column: value.column,
48 }
49 }
50}
51
52/// Represents an interval of characters in the original SQL query.
53#[derive(Eq, PartialEq, Hash, Clone, PartialOrd, Ord, Copy)]
54pub struct Span {
55 pub start: Location,
56 pub end: Location,
57}
58
59impl fmt::Debug for Span {
60 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
61 write!(f, "Span({:?}..{:?})", self.start, self.end)
62 }
63}
64
65impl Span {
66 /// Creates a new [`Span`] from a start and an end [`Location`].
67 pub fn new(start: Location, end: Location) -> Self {
68 Self { start, end }
69 }
70
71 /// Convert a [`Span`](sqlparser::tokenizer::Span) from the parser, into a
72 /// DataFusion [`Span`]. If the input span is empty (line 0 column 0, to
73 /// line 0 column 0), then [`None`] is returned.
74 #[cfg(feature = "sql")]
75 pub fn try_from_sqlparser_span(span: sqlparser::tokenizer::Span) -> Option<Span> {
76 if span == sqlparser::tokenizer::Span::empty() {
77 None
78 } else {
79 Some(Span {
80 start: span.start.into(),
81 end: span.end.into(),
82 })
83 }
84 }
85
86 /// Returns the smallest Span that contains both `self` and `other`
87 ///
88 /// # Examples
89 /// ```
90 /// # use sqlparser::tokenizer::{Span, Location};
91 /// // line 1, column1 -> line 2, column 5
92 /// let span1 = Span::new(Location::new(1, 1), Location::new(2, 5));
93 /// // line 2, column 3 -> line 3, column 7
94 /// let span2 = Span::new(Location::new(2, 3), Location::new(3, 7));
95 /// // Union of the two is the min/max of the two spans
96 /// // line 1, column 1 -> line 3, column 7
97 /// let union = span1.union(&span2);
98 /// assert_eq!(union, Span::new(Location::new(1, 1), Location::new(3, 7)));
99 /// ```
100 pub fn union(&self, other: &Span) -> Span {
101 Span {
102 start: cmp::min(self.start, other.start),
103 end: cmp::max(self.end, other.end),
104 }
105 }
106
107 /// Same as [Span::union] for `Option<Span>`.
108 ///
109 /// If `other` is `None`, `self` is returned.
110 pub fn union_opt(&self, other: &Option<Span>) -> Span {
111 match other {
112 Some(other) => self.union(other),
113 None => *self,
114 }
115 }
116
117 /// Return the [Span::union] of all spans in the iterator.
118 ///
119 /// If the iterator is empty, [`None`] is returned.
120 ///
121 /// # Example
122 /// ```
123 /// # use sqlparser::tokenizer::{Span, Location};
124 /// let spans = vec![
125 /// Span::new(Location::new(1, 1), Location::new(2, 5)),
126 /// Span::new(Location::new(2, 3), Location::new(3, 7)),
127 /// Span::new(Location::new(3, 1), Location::new(4, 2)),
128 /// ];
129 /// // line 1, column 1 -> line 4, column 2
130 /// assert_eq!(
131 /// Span::union_iter(spans),
132 /// Span::new(Location::new(1, 1), Location::new(4, 2))
133 /// );
134 pub fn union_iter<I: IntoIterator<Item = Span>>(iter: I) -> Option<Span> {
135 iter.into_iter().reduce(|acc, item| acc.union(&item))
136 }
137}
138
139/// A collection of [`Span`], meant to be used as a field of entities whose
140/// location in the original SQL query is desired to be tracked. Sometimes an
141/// entity can have multiple spans. e.g. if you want to track the position of
142/// the column a that comes from SELECT 1 AS a UNION ALL SELECT 2 AS a you'll
143/// need two spans.
144#[derive(Debug, Clone)]
145// Store the first [`Span`] on the stack because that is by far the most common
146// case. More will spill onto the heap.
147pub struct Spans(pub Vec<Span>);
148
149impl Spans {
150 /// Creates a new empty [`Spans`] with no [`Span`].
151 pub fn new() -> Self {
152 Spans(Vec::new())
153 }
154
155 /// Returns the first [`Span`], if any. This is useful when you know that
156 /// there's gonna be only one [`Span`] at most.
157 pub fn first(&self) -> Option<Span> {
158 self.0.first().copied()
159 }
160
161 /// Returns a slice of the [`Span`]s.
162 pub fn get_spans(&self) -> &[Span] {
163 &self.0
164 }
165
166 /// Adds a [`Span`] to the collection.
167 pub fn add_span(&mut self, span: Span) {
168 self.0.push(span);
169 }
170
171 /// Iterates over the [`Span`]s.
172 pub fn iter(&self) -> impl Iterator<Item = &Span> {
173 self.0.iter()
174 }
175}
176
177impl Default for Spans {
178 fn default() -> Self {
179 Self::new()
180 }
181}
182
183// Since [`Spans`] will be used as a field in other structs, we don't want it to
184// interfere with the equality and ordering of the entities themselves, since
185// this is just diagnostics information for the end user.
186impl PartialEq for Spans {
187 fn eq(&self, _other: &Self) -> bool {
188 true
189 }
190}
191
192// Since [`Spans`] will be used as a field in other structs, we don't want it to
193// interfere with the equality and ordering of the entities themselves, since
194// this is just diagnostics information for the end user.
195impl Eq for Spans {}
196
197// Since [`Spans`] will be used as a field in other structs, we don't want it to
198// interfere with the equality and ordering of the entities themselves, since
199// this is just diagnostics information for the end user.
200impl PartialOrd for Spans {
201 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
202 Some(self.cmp(other))
203 }
204}
205
206// Since [`Spans`] will be used as a field in other structs, we don't want it to
207// interfere with the equality and ordering of the entities themselves, since
208// this is just diagnostics information for the end user.
209impl Ord for Spans {
210 fn cmp(&self, _other: &Self) -> Ordering {
211 Ordering::Equal
212 }
213}
214
215// Since [`Spans`] will be used as a field in other structs, we don't want it to
216// interfere with the equality and ordering of the entities themselves, since
217// this is just diagnostics information for the end user.
218impl Hash for Spans {
219 fn hash<H: Hasher>(&self, _state: &mut H) {}
220}