shuck_indexer/lib.rs
1#![warn(missing_docs)]
2#![cfg_attr(not(test), warn(clippy::unwrap_used))]
3
4//! Positional and structural indexes over parsed shell scripts.
5//!
6//! The indexer complements `shuck-parser` by building compact lookup tables for
7//! source lines, comments, syntactic regions, heredoc bodies, and physical line
8//! continuations. It is intended to be built once from parser output and then
9//! shared by semantic analysis, lint rules, suppressions, formatters, and report
10//! rendering.
11//!
12//! All positions are byte offsets represented with `shuck_ast::TextSize` and
13//! `shuck_ast::TextRange`. The crate does not build a character index: callers
14//! that need display columns should combine these byte offsets with the original
15//! source text at the UI boundary.
16//!
17//! [`Indexer`] is the preferred construction path when parser output is
18//! available. The lower-level indexes are also exported for integrations that
19//! only need line mapping or that already have an AST-shaped source of comments
20//! or regions.
21mod comment_index;
22mod line_index;
23mod region_index;
24
25/// Comment lookup types derived from parser output.
26pub use comment_index::{CommentIndex, IndexedComment};
27/// Line-based offset lookup utilities.
28pub use line_index::{LineEndingStyle, LineIndex};
29/// Structural region indexes over parsed shell source.
30pub use region_index::{IndexedHeredoc, RegionIndex, RegionKind};
31
32use line_index::{RawContinuationCandidate, RawContinuationMode};
33use shuck_ast::{File, TextSize};
34use shuck_parser::parser::ParseResult;
35
36/// Optional index families that are not needed by every consumer.
37///
38/// The default options build the indexes used by linting and semantic analysis.
39/// Source-layout indexes retain formatter-oriented lookup tables such as raw
40/// continuation backslash offsets and heredoc closing-marker ranges.
41#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
42pub struct IndexerOptions {
43 source_layout_indexes: bool,
44}
45
46impl IndexerOptions {
47 /// Return default indexer options.
48 pub fn new() -> Self {
49 Self::default()
50 }
51
52 /// Enable or disable formatter-oriented source-layout indexes.
53 ///
54 /// When enabled, [`LineIndex::raw_continuation_line_starts`],
55 /// [`LineIndex::raw_continuation_backslashes`], and
56 /// [`RegionIndex::heredoc_closing_marker_range`] retain their lookup data.
57 /// The default keeps those retained indexes disabled while still computing
58 /// semantic continuation lines for [`Indexer::continuation_line_starts`].
59 pub fn with_source_layout_indexes(mut self, enabled: bool) -> Self {
60 self.source_layout_indexes = enabled;
61 self
62 }
63
64 /// Return whether formatter-oriented source-layout indexes are enabled.
65 pub fn source_layout_indexes(self) -> bool {
66 self.source_layout_indexes
67 }
68}
69
70/// Pre-computed positional and structural index over a parsed shell script.
71///
72/// `Indexer` owns the line, comment, and syntactic-region indexes for one source
73/// file. It also filters raw backslash-newline candidates into the continuation
74/// lines that matter to shell analysis: continuations in comments, quoted text,
75/// and heredoc bodies are excluded.
76///
77/// Build one `Indexer` for a parse result and pass references to downstream
78/// analysis code. Query methods borrow precomputed data and do not walk the AST,
79/// rescan the full source, or allocate.
80#[derive(Debug, Clone, PartialEq, Eq)]
81pub struct Indexer {
82 line_index: LineIndex,
83 comment_index: CommentIndex,
84 region_index: RegionIndex,
85 continuation_lines: Vec<TextSize>,
86}
87
88impl Indexer {
89 /// Build all indexes from parser output and the original source text.
90 ///
91 /// `source` must be the exact text used to produce `output`; ranges in the
92 /// parse result are interpreted as byte offsets into that string. Mismatched
93 /// source text can make line and region queries meaningless, even though the
94 /// constructor defensively avoids panicking on malformed comment ranges.
95 pub fn new(source: &str, output: &ParseResult) -> Self {
96 Self::for_file(source, &output.file)
97 }
98
99 /// Build indexes from parser output using explicit options.
100 ///
101 /// `source` must be the exact text used to produce `output`; ranges in the
102 /// parse result are interpreted as byte offsets into that string.
103 pub fn new_with_options(source: &str, output: &ParseResult, options: IndexerOptions) -> Self {
104 Self::for_file_with_options(source, &output.file, options)
105 }
106
107 /// Build all indexes from an already parsed file and the original source text.
108 ///
109 /// `source` must be the exact text used to produce `file`; ranges in the
110 /// AST are interpreted as byte offsets into that string.
111 pub fn for_file(source: &str, file: &File) -> Self {
112 Self::for_file_with_options(source, file, IndexerOptions::default())
113 }
114
115 /// Build indexes from an already parsed file using explicit options.
116 ///
117 /// `source` must be the exact text used to produce `file`; ranges in the
118 /// AST are interpreted as byte offsets into that string.
119 pub fn for_file_with_options(source: &str, file: &File, options: IndexerOptions) -> Self {
120 let raw_mode = if options.source_layout_indexes() {
121 RawContinuationMode::StoreAndReturn
122 } else {
123 RawContinuationMode::ReturnOnly
124 };
125 let (line_index, raw_continuations) = LineIndex::build(source, raw_mode);
126 let comment_index = CommentIndex::new(source, &line_index, file);
127 let region_index = RegionIndex::new_with_source_layout_indexes(
128 source,
129 file,
130 options.source_layout_indexes(),
131 );
132 let continuation_lines =
133 collect_continuation_lines(&raw_continuations, &comment_index, ®ion_index);
134
135 Self {
136 line_index,
137 comment_index,
138 region_index,
139 continuation_lines,
140 }
141 }
142
143 /// Return the line index for this source text.
144 ///
145 /// This is useful for converting diagnostic byte offsets to 1-based line
146 /// numbers or for extracting line-local snippets from the original source.
147 pub fn line_index(&self) -> &LineIndex {
148 &self.line_index
149 }
150
151 /// Return the comment index extracted from parser-owned comments.
152 ///
153 /// Comments are exposed in source order and include parser-recognized
154 /// comments inside nested shell constructs.
155 pub fn comment_index(&self) -> &CommentIndex {
156 &self.comment_index
157 }
158
159 /// Return the syntactic region index for quoted, heredoc, and related spans.
160 ///
161 /// Region lookups are intended for rules and formatters that need to avoid
162 /// interpreting bytes the same way in every syntactic context.
163 pub fn region_index(&self) -> &RegionIndex {
164 &self.region_index
165 }
166
167 /// Return byte offsets for the start of each semantic continuation line.
168 ///
169 /// Each offset points at the first byte of a physical line that continues
170 /// the previous one because that previous line ended with an active
171 /// backslash-newline. Continuations inside comments, quotes, and heredocs
172 /// are filtered out.
173 pub fn continuation_line_starts(&self) -> &[TextSize] {
174 &self.continuation_lines
175 }
176
177 /// Return whether `offset` is on a semantic continuation line.
178 ///
179 /// The query first maps `offset` to its containing 1-based line, then checks
180 /// whether that line starts at one of [`Self::continuation_line_starts`].
181 /// Offsets past the final byte of the source are treated according to the
182 /// last indexed line.
183 pub fn is_continuation(&self, offset: TextSize) -> bool {
184 let line = self.line_index.line_number(offset);
185 let Some(line_start) = self.line_index.line_start(line) else {
186 return false;
187 };
188
189 contains_offset(&self.continuation_lines, line_start)
190 }
191}
192
193fn collect_continuation_lines(
194 raw_continuations: &[RawContinuationCandidate],
195 comment_index: &CommentIndex,
196 region_index: &RegionIndex,
197) -> Vec<TextSize> {
198 let mut continuation_lines = Vec::new();
199
200 for continuation in raw_continuations {
201 if comment_index.is_comment(continuation.backslash)
202 || region_index.is_heredoc(continuation.backslash)
203 || region_index.is_quoted(continuation.backslash)
204 {
205 continue;
206 }
207
208 continuation_lines.push(continuation.line_start);
209 }
210
211 continuation_lines
212}
213
214fn contains_offset(offsets: &[TextSize], offset: TextSize) -> bool {
215 offsets.binary_search(&offset).is_ok()
216}
217
218#[cfg(test)]
219mod tests {
220 use super::*;
221 use shuck_parser::parser::Parser;
222
223 fn index(source: &str) -> Indexer {
224 let output = Parser::new(source).parse().unwrap();
225 Indexer::new(source, &output)
226 }
227
228 #[test]
229 fn detects_continuation_lines_without_allocating_source_copies() {
230 let source = "echo foo \\\n bar\necho \"foo\\\nbar\"\n";
231 let indexer = index(source);
232
233 assert_eq!(indexer.continuation_line_starts().len(), 1);
234 assert!(indexer.is_continuation(TextSize::new(11)));
235 assert!(!indexer.is_continuation(TextSize::new(28)));
236 assert!(
237 indexer
238 .line_index()
239 .raw_continuation_backslashes()
240 .is_empty()
241 );
242 assert!(indexer.region_index().heredocs().is_empty());
243 }
244
245 #[test]
246 fn retains_source_layout_indexes_only_when_requested() {
247 let source = "echo foo \\\n bar\ncat <<EOF\nbody\nEOF\n";
248 let output = Parser::new(source).parse().unwrap();
249 let indexer = Indexer::new_with_options(
250 source,
251 &output,
252 IndexerOptions::new().with_source_layout_indexes(true),
253 );
254
255 assert_eq!(
256 indexer.line_index().raw_continuation_backslashes(),
257 &[TextSize::new(source.find('\\').unwrap() as u32)]
258 );
259 assert_eq!(indexer.region_index().heredocs().len(), 1);
260 }
261
262 #[test]
263 fn round_trips_parser_output_into_regions_comments_and_lines() {
264 let source = "\
265#!/bin/bash
266echo \"$(printf '%s' \"$name\")\" # inline
267cat <<'EOF'
268literal $body
269EOF
270";
271 let indexer = index(source);
272
273 assert_eq!(indexer.line_index().line_count(), 6);
274 assert_eq!(indexer.comment_index().comments().len(), 2);
275
276 let heredoc_offset = TextSize::new(source.find("literal $body").unwrap() as u32);
277 assert_eq!(
278 indexer.region_index().region_at(heredoc_offset),
279 Some(RegionKind::Heredoc)
280 );
281 assert!(indexer.region_index().is_quoted(heredoc_offset));
282
283 let name_offset = TextSize::new(source.find("$name").unwrap() as u32);
284 assert_eq!(
285 indexer.region_index().region_at(name_offset),
286 Some(RegionKind::DoubleQuoted)
287 );
288 }
289}