libgraphql_parser/source_position.rs
1use crate::ast::AstPos;
2
3/// Source position information for parsing, with dual column tracking.
4///
5/// This is a pure data struct with no mutation methods. Lexers are responsible
6/// for computing position values as they scan input.
7///
8/// This is standalone with no dependency on libgraphql-core.
9/// All fields are private with accessor methods.
10///
11/// # Indexing Convention
12///
13/// **All position values are 0-based:**
14/// - `line`: 0 = first line of the document (0-based)
15/// - `col_utf8`: UTF-8 character count within the current line (0-based)
16/// - `col_utf16`: Optional UTF-16 code unit offset within the current line
17/// (0-based)
18/// - `byte_offset`: byte offset within the whole document (0-based)
19///
20/// # Dual Column Tracking
21///
22/// Two column representations are supported:
23/// - **`col_utf8`** (always available): Number of UTF-8 characters from the
24/// start of the current line. Increments by 1 for each character regardless
25/// of its byte representation. This is intuitive for users and matches what
26/// most text editors display as "column".
27/// - **`col_utf16`** (optional): UTF-16 code unit offset within the line. This
28/// aligns with LSP (Language Server Protocol) and many editors. It is `Some`
29/// when the token source can provide it (e.g. `StrToGraphQLTokenSource`),
30/// and `None` when it cannot (e.g. `RustMacroGraphQLTokenSource` in
31/// `libgraphql-macros` which uses `proc_macro2::Span` that only provides
32/// UTF-8 char-based positions).
33///
34/// For ASCII text, both columns are equal. For text containing characters
35/// outside the Basic Multilingual Plane (e.g., emoji), they differ:
36/// - `col_utf8` advances by 1 for each UTF-8 character
37/// - `col_utf16` advances by the character's UTF-16 length (1 or 2 code units)
38#[derive(Clone, Debug, Eq, PartialEq)]
39pub struct SourcePosition {
40 /// Line number (0-based: first line is 0)
41 line: usize,
42
43 /// UTF-8 character count within current line (0-based: first position is 0)
44 col_utf8: usize,
45
46 /// UTF-16 code unit offset within current line (0-based), if available.
47 /// None when the token source cannot provide UTF-16 column information.
48 col_utf16: Option<usize>,
49
50 /// byte offset from start of document (0-based: first byte is 0)
51 byte_offset: usize,
52}
53
54impl SourcePosition {
55 /// Create a new SourcePosition.
56 ///
57 /// # Arguments
58 /// - `line`: 0-based line number (0 = first line)
59 /// - `col_utf8`: 0-based UTF-8 character count within current line
60 /// - `col_utf16`: 0-based UTF-16 code unit offset within current line,
61 /// or `None` if not available (e.g., from `proc_macro2::Span`)
62 /// - `byte_offset`: 0-based byte offset from document start
63 pub fn new(
64 line: usize,
65 col_utf8: usize,
66 col_utf16: Option<usize>,
67 byte_offset: usize,
68 ) -> Self {
69 Self {
70 line,
71 col_utf8,
72 col_utf16,
73 byte_offset,
74 }
75 }
76
77 /// Returns the 0-based line number.
78 pub fn line(&self) -> usize {
79 self.line
80 }
81
82 /// Returns the 0-based (UTF-8) character count within the current line.
83 ///
84 /// This increments by 1 for each character regardless of byte
85 /// representation. For example, both 'a' (1 byte) and '🎉' (4 bytes) each
86 /// add 1 to this count.
87 pub fn col_utf8(&self) -> usize {
88 self.col_utf8
89 }
90
91 /// Returns the 0-based UTF-16 code unit offset within the current line,
92 /// if available.
93 ///
94 /// This is `Some` when the token source can provide UTF-16 column
95 /// information (e.g., `StrToGraphQLTokenSource`), and `None` when it
96 /// cannot (e.g., `RustMacroGraphQLTokenSource` in `libgraphql-macros`).
97 ///
98 /// For example, 'a' (1 UTF-16 code unit) adds 1 to this count, while '🎉'
99 /// (a surrogate pair requiring 2 UTF-16 code units) adds 2 to this count.
100 ///
101 /// For LSP compatibility, prefer this method when available.
102 pub fn col_utf16(&self) -> Option<usize> {
103 self.col_utf16
104 }
105
106 /// Returns the 0-based byte offset from document start.
107 pub fn byte_offset(&self) -> usize {
108 self.byte_offset
109 }
110
111 /// Convert to an `AstPos` for compatibility with `graphql_parser` types.
112 ///
113 /// Note: `AstPos` uses 1-based line and column numbers, so this method
114 /// adds 1 to both. The column is always derived from `col_utf8`.
115 pub fn to_ast_pos(&self) -> AstPos {
116 AstPos {
117 line: self.line + 1,
118 column: self.col_utf8 + 1,
119 }
120 }
121}