Skip to main content

nika_core/source/
registry.rs

1//! Multi-file source registry for tracking included files.
2//!
3//! The SourceRegistry maintains a collection of source files and provides
4//! utilities for converting byte offsets to line:col positions for error
5//! reporting.
6
7use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9use std::sync::Arc;
10
11use super::span::{ByteOffset, FileId, Span};
12
13/// A single source file with its content and path.
14#[derive(Debug)]
15pub struct SourceFile {
16    /// Unique identifier for this file.
17    pub id: FileId,
18    /// Path to the source file.
19    pub path: PathBuf,
20    /// Source content (shared for efficiency).
21    pub content: Arc<String>,
22    /// Line start byte offsets for fast line:col lookup.
23    /// line_starts[i] is the byte offset where line (i+1) starts.
24    line_starts: Vec<u32>,
25}
26
27impl SourceFile {
28    /// Create a new source file.
29    pub fn new(id: FileId, path: PathBuf, content: String) -> Self {
30        let line_starts = Self::compute_line_starts(&content);
31
32        Self {
33            id,
34            path,
35            content: Arc::new(content),
36            line_starts,
37        }
38    }
39
40    /// Compute line start offsets for a source string.
41    fn compute_line_starts(content: &str) -> Vec<u32> {
42        std::iter::once(0)
43            .chain(content.match_indices('\n').map(|(i, _)| i as u32 + 1))
44            .collect()
45    }
46
47    /// Get the number of lines in the file.
48    pub fn line_count(&self) -> usize {
49        self.line_starts.len()
50    }
51
52    /// Convert a byte offset to (line, column), both 1-indexed.
53    ///
54    /// Returns (1, 1) for offset 0, and so on.
55    pub fn offset_to_line_col(&self, offset: ByteOffset) -> (usize, usize) {
56        let offset = offset.as_usize();
57
58        // Binary search for the line containing this offset
59        let line = self
60            .line_starts
61            .partition_point(|&start| (start as usize) <= offset)
62            .saturating_sub(1);
63
64        let line_start = self.line_starts.get(line).copied().unwrap_or(0) as usize;
65        let column = offset.saturating_sub(line_start);
66
67        (line + 1, column + 1) // 1-indexed
68    }
69
70    /// Convert (line, column) to byte offset, both 1-indexed.
71    ///
72    /// Returns None if the line/column is out of bounds.
73    pub fn line_col_to_offset(&self, line: usize, column: usize) -> Option<ByteOffset> {
74        if line == 0 || column == 0 {
75            return None;
76        }
77
78        let line_start = self.line_starts.get(line - 1)?;
79        let offset = *line_start as usize + column - 1;
80
81        if offset <= self.content.len() {
82            Some(ByteOffset::from(offset))
83        } else {
84            None
85        }
86    }
87
88    /// Get the text at a span.
89    ///
90    /// Returns an empty string if the span is out of bounds.
91    pub fn text_at(&self, span: Span) -> &str {
92        let range = span.range();
93        if range.end <= self.content.len() {
94            &self.content[range]
95        } else {
96            ""
97        }
98    }
99
100    /// Get the text of a specific line (1-indexed).
101    ///
102    /// Returns the line without the trailing newline.
103    pub fn line_text(&self, line: usize) -> Option<&str> {
104        if line == 0 || line > self.line_starts.len() {
105            return None;
106        }
107
108        let start = self.line_starts[line - 1] as usize;
109        let end = self
110            .line_starts
111            .get(line)
112            .map(|&e| e as usize)
113            .unwrap_or(self.content.len());
114
115        Some(self.content[start..end].trim_end_matches('\n'))
116    }
117
118    /// Get a snippet of source code around a span for error display.
119    ///
120    /// Returns up to `context_lines` lines before and after the span.
121    pub fn snippet(&self, span: Span, context_lines: usize) -> SourceSnippet {
122        let (start_line, start_col) = self.offset_to_line_col(span.start);
123        let (end_line, end_col) = self.offset_to_line_col(span.end);
124
125        let first_line = start_line.saturating_sub(context_lines).max(1);
126        let last_line = (end_line + context_lines).min(self.line_count());
127
128        let lines: Vec<(usize, String)> = (first_line..=last_line)
129            .filter_map(|n| self.line_text(n).map(|text| (n, text.to_string())))
130            .collect();
131
132        SourceSnippet {
133            path: self.path.clone(),
134            lines,
135            highlight_start: (start_line, start_col),
136            highlight_end: (end_line, end_col),
137        }
138    }
139}
140
141/// A snippet of source code for error display.
142#[derive(Debug, Clone)]
143pub struct SourceSnippet {
144    /// Path to the source file.
145    pub path: PathBuf,
146    /// Lines of source code: (line_number, content).
147    pub lines: Vec<(usize, String)>,
148    /// Start of highlight region (line, column), 1-indexed.
149    pub highlight_start: (usize, usize),
150    /// End of highlight region (line, column), 1-indexed.
151    pub highlight_end: (usize, usize),
152}
153
154/// Registry of all source files in a compilation unit.
155///
156/// Tracks multiple files (main workflow + pkg: includes) and provides
157/// utilities for error reporting across files.
158#[derive(Debug, Default)]
159pub struct SourceRegistry {
160    /// All registered source files.
161    files: Vec<SourceFile>,
162    /// Map from path to file ID for deduplication.
163    path_to_id: HashMap<PathBuf, FileId>,
164    /// Next file ID to assign.
165    next_id: u32,
166}
167
168impl SourceRegistry {
169    /// Create a new empty source registry.
170    pub fn new() -> Self {
171        Self::default()
172    }
173
174    /// Add a source file and return its FileId.
175    ///
176    /// If the file was already added (same path), returns the existing FileId.
177    pub fn add_file(&mut self, path: impl AsRef<Path>, content: String) -> FileId {
178        let path = path.as_ref().to_path_buf();
179
180        // Check for existing file with same path
181        if let Some(&id) = self.path_to_id.get(&path) {
182            return id;
183        }
184
185        let id = FileId(self.next_id);
186        self.next_id += 1;
187
188        let file = SourceFile::new(id, path.clone(), content);
189        self.files.push(file);
190        self.path_to_id.insert(path, id);
191
192        id
193    }
194
195    /// Add a source file from a string (e.g., for tests).
196    pub fn add_string(&mut self, name: &str, content: String) -> FileId {
197        self.add_file(PathBuf::from(name), content)
198    }
199
200    /// Get a source file by ID.
201    pub fn get(&self, id: FileId) -> Option<&SourceFile> {
202        if id.is_dummy() {
203            return None;
204        }
205        self.files.get(id.0 as usize)
206    }
207
208    /// Get the file path for a FileId.
209    pub fn path(&self, id: FileId) -> Option<&Path> {
210        self.get(id).map(|f| f.path.as_path())
211    }
212
213    /// Get the source content for a FileId.
214    pub fn content(&self, id: FileId) -> Option<&str> {
215        self.get(id).map(|f| f.content.as_str())
216    }
217
218    /// Get the number of registered files.
219    pub fn file_count(&self) -> usize {
220        self.files.len()
221    }
222
223    /// Format a span as "path:line:col".
224    pub fn format_location(&self, span: Span) -> String {
225        if span.is_dummy() {
226            return "<unknown>".to_string();
227        }
228
229        if let Some(file) = self.get(span.file) {
230            let (line, col) = file.offset_to_line_col(span.start);
231            format!("{}:{}:{}", file.path.display(), line, col)
232        } else {
233            "<unknown>".to_string()
234        }
235    }
236
237    /// Format a span with just "line:col" (no path).
238    pub fn format_position(&self, span: Span) -> String {
239        if span.is_dummy() {
240            return "<unknown>".to_string();
241        }
242
243        if let Some(file) = self.get(span.file) {
244            let (line, col) = file.offset_to_line_col(span.start);
245            format!("{}:{}", line, col)
246        } else {
247            "<unknown>".to_string()
248        }
249    }
250
251    /// Get the text at a span.
252    pub fn text_at(&self, span: Span) -> &str {
253        if span.is_dummy() {
254            return "";
255        }
256
257        self.get(span.file).map(|f| f.text_at(span)).unwrap_or("")
258    }
259
260    /// Create a NamedSource for miette error reporting.
261    pub fn named_source(&self, id: FileId) -> Option<miette::NamedSource<String>> {
262        self.get(id)
263            .map(|f| miette::NamedSource::new(f.path.display().to_string(), f.content.to_string()))
264    }
265
266    /// Convert our Span to miette::SourceSpan.
267    pub fn to_miette_span(&self, span: Span) -> miette::SourceSpan {
268        miette::SourceSpan::new(
269            miette::SourceOffset::from(span.start.as_usize()),
270            span.len(),
271        )
272    }
273}
274
275#[cfg(test)]
276mod tests {
277    use super::*;
278
279    const SAMPLE_SOURCE: &str =
280        "schema: \"nika/workflow@0.12\"\nworkflow: test\ntasks:\n  - id: foo\n";
281
282    #[test]
283    fn test_source_file_line_col() {
284        let file = SourceFile::new(
285            FileId(0),
286            PathBuf::from("test.yaml"),
287            SAMPLE_SOURCE.to_string(),
288        );
289
290        // Line 1, column 1 (start of file)
291        assert_eq!(file.offset_to_line_col(ByteOffset(0)), (1, 1));
292
293        // Line 1, column 8 ("nika...)
294        assert_eq!(file.offset_to_line_col(ByteOffset(7)), (1, 8));
295
296        // Line 2, column 1 (after first newline at offset 28)
297        assert_eq!(file.offset_to_line_col(ByteOffset(29)), (2, 1));
298    }
299
300    #[test]
301    fn test_source_file_line_text() {
302        let file = SourceFile::new(
303            FileId(0),
304            PathBuf::from("test.yaml"),
305            SAMPLE_SOURCE.to_string(),
306        );
307
308        assert_eq!(file.line_text(1), Some("schema: \"nika/workflow@0.12\""));
309        assert_eq!(file.line_text(2), Some("workflow: test"));
310        assert_eq!(file.line_text(3), Some("tasks:"));
311        assert_eq!(file.line_text(4), Some("  - id: foo"));
312        // Line 5 exists (empty line after trailing \n)
313        assert_eq!(file.line_text(5), Some(""));
314        // Line 6 doesn't exist
315        assert_eq!(file.line_text(6), None);
316        // Line 0 is invalid (1-indexed)
317        assert_eq!(file.line_text(0), None);
318    }
319
320    #[test]
321    fn test_registry_add_file() {
322        let mut registry = SourceRegistry::new();
323
324        let id1 = registry.add_file("a.yaml", "content a".to_string());
325        let id2 = registry.add_file("b.yaml", "content b".to_string());
326        let id3 = registry.add_file("a.yaml", "ignored".to_string()); // duplicate
327
328        assert_eq!(id1, FileId(0));
329        assert_eq!(id2, FileId(1));
330        assert_eq!(id3, id1); // Same file, same ID
331        assert_eq!(registry.file_count(), 2);
332    }
333
334    #[test]
335    fn test_registry_format_location() {
336        let mut registry = SourceRegistry::new();
337        let id = registry.add_file("workflow.yaml", SAMPLE_SOURCE.to_string());
338
339        let span = Span::new(id, 29, 40);
340        let location = registry.format_location(span);
341
342        assert!(location.starts_with("workflow.yaml:2:1"));
343    }
344
345    #[test]
346    fn test_registry_dummy_span() {
347        let registry = SourceRegistry::new();
348        let span = Span::dummy();
349
350        assert_eq!(registry.format_location(span), "<unknown>");
351        assert_eq!(registry.text_at(span), "");
352    }
353
354    #[test]
355    fn test_line_col_to_offset() {
356        let file = SourceFile::new(
357            FileId(0),
358            PathBuf::from("test.yaml"),
359            SAMPLE_SOURCE.to_string(),
360        );
361
362        // Line 1, column 1 -> offset 0
363        assert_eq!(file.line_col_to_offset(1, 1), Some(ByteOffset(0)));
364
365        // Line 2, column 1 -> offset 29 (after first line + newline)
366        assert_eq!(file.line_col_to_offset(2, 1), Some(ByteOffset(29)));
367
368        // Invalid inputs
369        assert_eq!(file.line_col_to_offset(0, 1), None);
370        assert_eq!(file.line_col_to_offset(1, 0), None);
371        assert_eq!(file.line_col_to_offset(100, 1), None);
372    }
373}