libgraphql_parser/source_position.rs
1/// Source position information for parsing, with dual column tracking.
2///
3/// This is a pure data struct with no mutation methods. Lexers are responsible
4/// for computing position values as they scan input.
5///
6/// This is standalone with no dependency on libgraphql-core.
7/// All fields are private with accessor methods.
8///
9/// # Indexing Convention
10///
11/// **All position values are 0-based:**
12/// - `line`: 0 = first line of the document (0-based)
13/// - `col_utf8`: UTF-8 character count within the current line (0-based)
14/// - `col_utf16`: Optional UTF-16 code unit offset within the current line
15/// (0-based)
16/// - `byte_offset`: byte offset within the whole document (0-based)
17///
18/// # Dual Column Tracking
19///
20/// Two column representations are supported:
21/// - **`col_utf8`** (always available): Number of UTF-8 characters from the
22/// start of the current line. Increments by 1 for each character regardless
23/// of its byte representation. This is intuitive for users and matches what
24/// most text editors display as "column".
25/// - **`col_utf16`** (optional): UTF-16 code unit offset within the line. This
26/// aligns with LSP (Language Server Protocol) and many editors. It is `Some`
27/// when the token source can provide it (e.g. `StrToGraphQLTokenSource`),
28/// and `None` when it cannot (e.g. `RustMacroGraphQLTokenSource` in
29/// `libgraphql-macros` which uses `proc_macro2::Span` that only provides
30/// UTF-8 char-based positions).
31///
32/// For ASCII text, both columns are equal. For text containing characters
33/// outside the Basic Multilingual Plane (e.g., emoji), they differ:
34/// - `col_utf8` advances by 1 for each UTF-8 character
35/// - `col_utf16` advances by the character's UTF-16 length (1 or 2 code units)
36///
37/// # Size
38///
39/// This struct is 20 bytes (4 × `u32` + `Option<u32>`) and derives `Copy`
40/// for cheap value semantics. The `u32` fields support files up to 4 GB
41/// and lines/columns up to ~4 billion — more than sufficient for any
42/// realistic GraphQL document. Accessors return `usize` for ergonomic
43/// interop with Rust's standard indexing types.
44#[derive(Copy, Clone, Debug, Eq, PartialEq)]
45pub struct SourcePosition {
46 /// Line number (0-based: first line is 0)
47 line: u32,
48
49 /// UTF-8 character count within current line (0-based: first position is 0)
50 col_utf8: u32,
51
52 /// UTF-16 code unit offset within current line (0-based), if available.
53 /// None when the token source cannot provide UTF-16 column information.
54 col_utf16: Option<u32>,
55
56 /// byte offset from start of document (0-based: first byte is 0)
57 byte_offset: u32,
58}
59
60impl SourcePosition {
61 /// Create a new SourcePosition.
62 ///
63 /// # Arguments
64 /// - `line`: 0-based line number (0 = first line)
65 /// - `col_utf8`: 0-based UTF-8 character count within current line
66 /// - `col_utf16`: 0-based UTF-16 code unit offset within current line,
67 /// or `None` if not available (e.g., from `proc_macro2::Span`)
68 /// - `byte_offset`: 0-based byte offset from document start
69 pub fn new(
70 line: usize,
71 col_utf8: usize,
72 col_utf16: Option<usize>,
73 byte_offset: usize,
74 ) -> Self {
75 debug_assert!(line <= u32::MAX as usize, "line overflows u32: {line}");
76 debug_assert!(col_utf8 <= u32::MAX as usize, "col_utf8 overflows u32: {col_utf8}");
77 debug_assert!(byte_offset <= u32::MAX as usize, "byte_offset overflows u32: {byte_offset}");
78 if let Some(c) = col_utf16 {
79 debug_assert!(c <= u32::MAX as usize, "col_utf16 overflows u32: {c}");
80 }
81 Self {
82 line: line as u32,
83 col_utf8: col_utf8 as u32,
84 col_utf16: col_utf16.map(|c| c as u32),
85 byte_offset: byte_offset as u32,
86 }
87 }
88
89 /// Returns the 0-based line number.
90 pub fn line(&self) -> usize {
91 self.line as usize
92 }
93
94 /// Returns the 0-based `(line, col_utf8)` tuple as `u32`
95 /// values.
96 ///
97 /// This is a low-level accessor that returns the raw stored
98 /// `u32` fields without converting to `usize`. Useful for
99 /// compact representations (e.g. serialization, span maps).
100 pub fn line_col(&self) -> (u32, u32) {
101 (self.line, self.col_utf8)
102 }
103
104 /// Returns the 0-based (UTF-8) character count within the current line.
105 ///
106 /// This increments by 1 for each character regardless of byte
107 /// representation. For example, both 'a' (1 byte) and '🎉' (4 bytes) each
108 /// add 1 to this count.
109 pub fn col_utf8(&self) -> usize {
110 self.col_utf8 as usize
111 }
112
113 /// Returns the 0-based UTF-16 code unit offset within the current line,
114 /// if available.
115 ///
116 /// This is `Some` when the token source can provide UTF-16 column
117 /// information (e.g., `StrToGraphQLTokenSource`), and `None` when it
118 /// cannot (e.g., `RustMacroGraphQLTokenSource` in `libgraphql-macros`).
119 ///
120 /// For example, 'a' (1 UTF-16 code unit) adds 1 to this count, while '🎉'
121 /// (a surrogate pair requiring 2 UTF-16 code units) adds 2 to this count.
122 ///
123 /// For LSP compatibility, prefer this method when available.
124 pub fn col_utf16(&self) -> Option<usize> {
125 self.col_utf16.map(|c| c as usize)
126 }
127
128 /// Returns the 0-based byte offset from document start.
129 pub fn byte_offset(&self) -> usize {
130 self.byte_offset as usize
131 }
132}