Skip to main content

libgraphql_parser/
source_position.rs

1use crate::ast::AstPos;
2
3/// Source position information for parsing, with dual column tracking.
4///
5/// This is a pure data struct with no mutation methods. Lexers are responsible
6/// for computing position values as they scan input.
7///
8/// This is standalone with no dependency on libgraphql-core.
9/// All fields are private with accessor methods.
10///
11/// # Indexing Convention
12///
13/// **All position values are 0-based:**
14/// - `line`: 0 = first line of the document (0-based)
15/// - `col_utf8`: UTF-8 character count within the current line (0-based)
16/// - `col_utf16`: Optional UTF-16 code unit offset within the current line
17///   (0-based)
18/// - `byte_offset`: byte offset within the whole document (0-based)
19///
20/// # Dual Column Tracking
21///
22/// Two column representations are supported:
23/// - **`col_utf8`** (always available): Number of UTF-8 characters from the
24///   start of the current line. Increments by 1 for each character regardless
25///   of its byte representation. This is intuitive for users and matches what
26///   most text editors display as "column".
27/// - **`col_utf16`** (optional): UTF-16 code unit offset within the line. This
28///   aligns with LSP (Language Server Protocol) and many editors. It is `Some`
29///   when the token source can provide it (e.g. `StrToGraphQLTokenSource`),
30///   and `None` when it cannot (e.g. `RustMacroGraphQLTokenSource` in
31///   `libgraphql-macros` which uses `proc_macro2::Span` that only provides
32///   UTF-8 char-based positions).
33///
34/// For ASCII text, both columns are equal. For text containing characters
35/// outside the Basic Multilingual Plane (e.g., emoji), they differ:
36/// - `col_utf8` advances by 1 for each UTF-8 character
37/// - `col_utf16` advances by the character's UTF-16 length (1 or 2 code units)
38#[derive(Clone, Debug, Eq, PartialEq)]
39pub struct SourcePosition {
40    /// Line number (0-based: first line is 0)
41    line: usize,
42
43    /// UTF-8 character count within current line (0-based: first position is 0)
44    col_utf8: usize,
45
46    /// UTF-16 code unit offset within current line (0-based), if available.
47    /// None when the token source cannot provide UTF-16 column information.
48    col_utf16: Option<usize>,
49
50    /// byte offset from start of document (0-based: first byte is 0)
51    byte_offset: usize,
52}
53
54impl SourcePosition {
55    /// Create a new SourcePosition.
56    ///
57    /// # Arguments
58    /// - `line`: 0-based line number (0 = first line)
59    /// - `col_utf8`: 0-based UTF-8 character count within current line
60    /// - `col_utf16`: 0-based UTF-16 code unit offset within current line,
61    ///   or `None` if not available (e.g., from `proc_macro2::Span`)
62    /// - `byte_offset`: 0-based byte offset from document start
63    pub fn new(
64        line: usize,
65        col_utf8: usize,
66        col_utf16: Option<usize>,
67        byte_offset: usize,
68    ) -> Self {
69        Self {
70            line,
71            col_utf8,
72            col_utf16,
73            byte_offset,
74        }
75    }
76
77    /// Returns the 0-based line number.
78    pub fn line(&self) -> usize {
79        self.line
80    }
81
82    /// Returns the 0-based (UTF-8) character count within the current line.
83    ///
84    /// This increments by 1 for each character regardless of byte
85    /// representation. For example, both 'a' (1 byte) and '🎉' (4 bytes) each
86    /// add 1 to this count.
87    pub fn col_utf8(&self) -> usize {
88        self.col_utf8
89    }
90
91    /// Returns the 0-based UTF-16 code unit offset within the current line,
92    /// if available.
93    ///
94    /// This is `Some` when the token source can provide UTF-16 column
95    /// information (e.g., `StrToGraphQLTokenSource`), and `None` when it
96    /// cannot (e.g., `RustMacroGraphQLTokenSource` in `libgraphql-macros`).
97    ///
98    /// For example, 'a' (1 UTF-16 code unit) adds 1 to this count, while '🎉'
99    /// (a surrogate pair requiring 2 UTF-16 code units) adds 2 to this count.
100    ///
101    /// For LSP compatibility, prefer this method when available.
102    pub fn col_utf16(&self) -> Option<usize> {
103        self.col_utf16
104    }
105
106    /// Returns the 0-based byte offset from document start.
107    pub fn byte_offset(&self) -> usize {
108        self.byte_offset
109    }
110
111    /// Convert to an `AstPos` for compatibility with `graphql_parser` types.
112    ///
113    /// Note: `AstPos` uses 1-based line and column numbers, so this method
114    /// adds 1 to both. The column is always derived from `col_utf8`.
115    pub fn to_ast_pos(&self) -> AstPos {
116        AstPos {
117            line: self.line + 1,
118            column: self.col_utf8 + 1,
119        }
120    }
121}