Skip to main content

mirage/cfg/
source.rs

1//! Source location mapping for CFG blocks
2
3use serde::{Deserialize, Serialize};
4use std::path::PathBuf;
5
6/// Source location for a CFG block or statement
7#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
8pub struct SourceLocation {
9    /// File path (relative to crate root or absolute)
10    pub file_path: PathBuf,
11    /// Byte offset of the start
12    pub byte_start: usize,
13    /// Byte offset of the end
14    pub byte_end: usize,
15    /// Line number (1-indexed)
16    pub start_line: usize,
17    /// Column number (1-indexed, in UTF-8 characters)
18    pub start_column: usize,
19    /// End line number
20    pub end_line: usize,
21    /// End column number
22    pub end_column: usize,
23}
24
25impl SourceLocation {
26    /// Create a new source location
27    pub fn new(
28        file_path: impl Into<PathBuf>,
29        byte_start: usize,
30        byte_end: usize,
31        start_line: usize,
32        start_column: usize,
33        end_line: usize,
34        end_column: usize,
35    ) -> Self {
36        Self {
37            file_path: file_path.into(),
38            byte_start,
39            byte_end,
40            start_line,
41            start_column,
42            end_line,
43            end_column,
44        }
45    }
46
47    /// Convert byte offsets to line/column
48    ///
49    /// This is a simplified implementation. For production use,
50    /// you'd want to cache line endings and handle edge cases.
51    pub fn from_bytes(
52        file_path: impl Into<PathBuf>,
53        source: &str,
54        byte_start: usize,
55        byte_end: usize,
56    ) -> Self {
57        let (start_line, start_col) = byte_to_line_column(source, byte_start);
58        let (end_line, end_col) = byte_to_line_column(source, byte_end);
59
60        Self {
61            file_path: file_path.into(),
62            byte_start,
63            byte_end,
64            start_line,
65            start_column: start_col,
66            end_line,
67            end_column: end_col,
68        }
69    }
70
71    /// Get a human-readable description
72    pub fn display(&self) -> String {
73        format!(
74            "{}:{}:{}-{}:{}",
75            self.file_path.display(),
76            self.start_line,
77            self.start_column,
78            self.end_line,
79            self.end_column
80        )
81    }
82
83    /// Check if this location overlaps with another
84    pub fn overlaps(&self, other: &SourceLocation) -> bool {
85        if self.file_path != other.file_path {
86            return false;
87        }
88        // Overlap if ranges intersect
89        self.byte_start < other.byte_end && self.byte_end > other.byte_start
90    }
91
92    /// Create a source location from byte ranges, with optional source for line/column.
93    ///
94    /// If source is provided, computes line/column from byte offsets.
95    /// If source is None, line/column fields are set to 0 (lazy computation).
96    ///
97    /// This is useful when reconstructing SourceLocation from database where
98    /// the source file may not be available.
99    pub fn from_bytes_with_source(
100        file_path: impl Into<PathBuf>,
101        source: Option<&str>,
102        byte_start: usize,
103        byte_end: usize,
104    ) -> Self {
105        let file_path = file_path.into();
106
107        if let Some(src) = source {
108            // Compute line/column from source
109            let (start_line, start_col) = byte_to_line_column(src, byte_start);
110            let (end_line, end_col) = byte_to_line_column(src, byte_end);
111            Self {
112                file_path,
113                byte_start,
114                byte_end,
115                start_line,
116                start_column: start_col,
117                end_line,
118                end_column: end_col,
119            }
120        } else {
121            // No source available - line/column will be 0
122            // Display will fall back to byte ranges
123            Self {
124                file_path,
125                byte_start,
126                byte_end,
127                start_line: 0,
128                start_column: 0,
129                end_line: 0,
130                end_column: 0,
131            }
132        }
133    }
134
135    /// Get a human-readable description (fallback to byte ranges if line/column unavailable)
136    pub fn display_or_bytes(&self) -> String {
137        if self.start_line > 0 {
138            self.display()
139        } else {
140            format!(
141                "{}:bytes{}-{}",
142                self.file_path.display(),
143                self.byte_start,
144                self.byte_end
145            )
146        }
147    }
148}
149
150/// Convert byte offset to line and column (1-indexed)
151fn byte_to_line_column(source: &str, byte_offset: usize) -> (usize, usize) {
152    let mut line = 1;
153    let mut column = 1;
154    let mut current_byte = 0;
155
156    for ch in source.chars() {
157        if current_byte >= byte_offset {
158            break;
159        }
160
161        if ch == '\n' {
162            line += 1;
163            column = 1;
164        } else {
165            column += 1;
166        }
167
168        current_byte += ch.len_utf8();
169    }
170
171    (line, column)
172}
173
174#[cfg(test)]
175mod tests {
176    use super::*;
177
178    #[test]
179    fn test_byte_to_line_column() {
180        let source = "line 1\nline 2\nline 3";
181
182        assert_eq!(byte_to_line_column(source, 0), (1, 1));
183        assert_eq!(byte_to_line_column(source, 6), (1, 7));
184        assert_eq!(byte_to_line_column(source, 7), (2, 1));
185        assert_eq!(byte_to_line_column(source, 13), (2, 7));
186        assert_eq!(byte_to_line_column(source, 14), (3, 1));
187    }
188
189    #[test]
190    fn test_source_location_from_bytes() {
191        let source = "hello\nworld";
192        let loc = SourceLocation::from_bytes("test.rs", source, 0, 5);
193
194        assert_eq!(loc.start_line, 1);
195        assert_eq!(loc.start_column, 1);
196        assert_eq!(loc.end_line, 1);
197        assert_eq!(loc.end_column, 6);
198    }
199
200    #[test]
201    fn test_source_location_display() {
202        let loc = SourceLocation {
203            file_path: PathBuf::from("src/test.rs"),
204            byte_start: 0,
205            byte_end: 10,
206            start_line: 5,
207            start_column: 3,
208            end_line: 5,
209            end_column: 13,
210        };
211
212        assert_eq!(loc.display(), "src/test.rs:5:3-5:13");
213    }
214
215    #[test]
216    fn test_overlaps() {
217        let loc1 = SourceLocation {
218            file_path: PathBuf::from("test.rs"),
219            byte_start: 0,
220            byte_end: 10,
221            start_line: 1,
222            start_column: 1,
223            end_line: 1,
224            end_column: 11,
225        };
226
227        let loc2 = SourceLocation {
228            file_path: PathBuf::from("test.rs"),
229            byte_start: 5,
230            byte_end: 15,
231            start_line: 1,
232            start_column: 6,
233            end_line: 1,
234            end_column: 16,
235        };
236
237        assert!(loc1.overlaps(&loc2));
238
239        let loc3 = SourceLocation {
240            file_path: PathBuf::from("other.rs"),
241            byte_start: 0,
242            byte_end: 10,
243            start_line: 1,
244            start_column: 1,
245            end_line: 1,
246            end_column: 11,
247        };
248
249        assert!(!loc1.overlaps(&loc3)); // Different file
250    }
251
252    #[test]
253    fn test_source_location_new() {
254        let loc = SourceLocation::new("path/to/file.rs", 100, 200, 10, 5, 15, 20);
255
256        assert_eq!(loc.file_path, PathBuf::from("path/to/file.rs"));
257        assert_eq!(loc.byte_start, 100);
258        assert_eq!(loc.byte_end, 200);
259        assert_eq!(loc.start_line, 10);
260        assert_eq!(loc.start_column, 5);
261        assert_eq!(loc.end_line, 15);
262        assert_eq!(loc.end_column, 20);
263    }
264
265    #[test]
266    fn test_multibyte_character_handling() {
267        // Test UTF-8 multibyte character handling
268        // "hello 世界\nworld" has:
269        // "hello " = 6 bytes (h=1, e=1, l=1, l=1, o=1, space=1)
270        // "世" = 3 bytes (UTF-8), starts at byte 6
271        // "界" = 3 bytes (UTF-8), starts at byte 9
272        // "\n" = 1 byte, starts at byte 12
273        // Total to end of line 1 = 13 bytes (0-12)
274
275        // At byte 0: column 1 (h)
276        // At byte 6: column 7 (first byte of "世")
277        // At byte 12: column 8 (newline)
278        // At byte 13: line 2, column 1 (w)
279
280        let source = "hello 世界\nworld";
281
282        let (line, col) = byte_to_line_column(source, 0);
283        assert_eq!(line, 1);
284        assert_eq!(col, 1);
285
286        let (line, col) = byte_to_line_column(source, 6);
287        assert_eq!(line, 1);
288        assert_eq!(col, 7); // h e l l o space = 6 chars processed, next is 7th
289
290        let (line, col) = byte_to_line_column(source, 13);
291        assert_eq!(line, 2);
292        assert_eq!(col, 1); // w on line 2
293    }
294
295    #[test]
296    fn test_overlaps_adjacent_no_overlap() {
297        // Adjacent ranges do not overlap
298        let loc1 = SourceLocation {
299            file_path: PathBuf::from("test.rs"),
300            byte_start: 0,
301            byte_end: 10,
302            start_line: 1,
303            start_column: 1,
304            end_line: 1,
305            end_column: 11,
306        };
307
308        let loc2 = SourceLocation {
309            file_path: PathBuf::from("test.rs"),
310            byte_start: 10,
311            byte_end: 20,
312            start_line: 1,
313            start_column: 11,
314            end_line: 1,
315            end_column: 21,
316        };
317
318        assert!(!loc1.overlaps(&loc2));
319    }
320
321    #[test]
322    fn test_overlaps_contained() {
323        // One range contained within another
324        let loc1 = SourceLocation {
325            file_path: PathBuf::from("test.rs"),
326            byte_start: 0,
327            byte_end: 100,
328            start_line: 1,
329            start_column: 1,
330            end_line: 5,
331            end_column: 1,
332        };
333
334        let loc2 = SourceLocation {
335            file_path: PathBuf::from("test.rs"),
336            byte_start: 20,
337            byte_end: 30,
338            start_line: 2,
339            start_column: 1,
340            end_line: 2,
341            end_column: 11,
342        };
343
344        assert!(loc1.overlaps(&loc2));
345        assert!(loc2.overlaps(&loc1));
346    }
347
348    #[test]
349    fn test_from_bytes_with_source_with_source() {
350        let source = "hello\nworld";
351        let loc = SourceLocation::from_bytes_with_source("test.rs", Some(source), 0, 5);
352
353        assert_eq!(loc.file_path, PathBuf::from("test.rs"));
354        assert_eq!(loc.byte_start, 0);
355        assert_eq!(loc.byte_end, 5);
356        assert_eq!(loc.start_line, 1);
357        assert_eq!(loc.start_column, 1);
358        assert_eq!(loc.end_line, 1);
359        assert_eq!(loc.end_column, 6);
360    }
361
362    #[test]
363    fn test_from_bytes_with_source_without_source() {
364        let loc = SourceLocation::from_bytes_with_source("test.rs", None, 10, 20);
365
366        assert_eq!(loc.file_path, PathBuf::from("test.rs"));
367        assert_eq!(loc.byte_start, 10);
368        assert_eq!(loc.byte_end, 20);
369        assert_eq!(loc.start_line, 0); // No source = 0
370        assert_eq!(loc.start_column, 0);
371        assert_eq!(loc.end_line, 0);
372        assert_eq!(loc.end_column, 0);
373    }
374
375    #[test]
376    fn test_display_or_bytes_with_line_column() {
377        let loc = SourceLocation {
378            file_path: PathBuf::from("test.rs"),
379            byte_start: 0,
380            byte_end: 10,
381            start_line: 1,
382            start_column: 1,
383            end_line: 1,
384            end_column: 11,
385        };
386
387        assert_eq!(loc.display_or_bytes(), "test.rs:1:1-1:11");
388    }
389
390    #[test]
391    fn test_display_or_bytes_without_line_column() {
392        let loc = SourceLocation {
393            file_path: PathBuf::from("test.rs"),
394            byte_start: 100,
395            byte_end: 200,
396            start_line: 0,
397            start_column: 0,
398            end_line: 0,
399            end_column: 0,
400        };
401
402        assert_eq!(loc.display_or_bytes(), "test.rs:bytes100-200");
403    }
404}