Skip to main content

libgraphql_parser/
source_position.rs

1/// Source position information for parsing, with dual column tracking.
2///
3/// This is a pure data struct with no mutation methods. Lexers are responsible
4/// for computing position values as they scan input.
5///
6/// This is standalone with no dependency on libgraphql-core.
7/// All fields are private with accessor methods.
8///
9/// # Indexing Convention
10///
11/// **All position values are 0-based:**
12/// - `line`: 0 = first line of the document (0-based)
13/// - `col_utf8`: UTF-8 character count within the current line (0-based)
14/// - `col_utf16`: Optional UTF-16 code unit offset within the current line
15///   (0-based)
16/// - `byte_offset`: byte offset within the whole document (0-based)
17///
18/// # Dual Column Tracking
19///
20/// Two column representations are supported:
21/// - **`col_utf8`** (always available): Number of UTF-8 characters from the
22///   start of the current line. Increments by 1 for each character regardless
23///   of its byte representation. This is intuitive for users and matches what
24///   most text editors display as "column".
25/// - **`col_utf16`** (optional): UTF-16 code unit offset within the line. This
26///   aligns with LSP (Language Server Protocol) and many editors. It is `Some`
27///   when the token source can provide it (e.g. `StrToGraphQLTokenSource`),
28///   and `None` when it cannot (e.g. `RustMacroGraphQLTokenSource` in
29///   `libgraphql-macros` which uses `proc_macro2::Span` that only provides
30///   UTF-8 char-based positions).
31///
32/// For ASCII text, both columns are equal. For text containing characters
33/// outside the Basic Multilingual Plane (e.g., emoji), they differ:
34/// - `col_utf8` advances by 1 for each UTF-8 character
35/// - `col_utf16` advances by the character's UTF-16 length (1 or 2 code units)
36///
37/// # Size
38///
39/// This struct is 20 bytes (4 × `u32` + `Option<u32>`) and derives `Copy`
40/// for cheap value semantics. The `u32` fields support files up to 4 GB
41/// and lines/columns up to ~4 billion — more than sufficient for any
42/// realistic GraphQL document. Accessors return `usize` for ergonomic
43/// interop with Rust's standard indexing types.
44#[derive(Copy, Clone, Debug, Eq, PartialEq)]
45pub struct SourcePosition {
46    /// Line number (0-based: first line is 0)
47    line: u32,
48
49    /// UTF-8 character count within current line (0-based: first position is 0)
50    col_utf8: u32,
51
52    /// UTF-16 code unit offset within current line (0-based), if available.
53    /// None when the token source cannot provide UTF-16 column information.
54    col_utf16: Option<u32>,
55
56    /// byte offset from start of document (0-based: first byte is 0)
57    byte_offset: u32,
58}
59
60impl SourcePosition {
61    /// Create a new SourcePosition.
62    ///
63    /// # Arguments
64    /// - `line`: 0-based line number (0 = first line)
65    /// - `col_utf8`: 0-based UTF-8 character count within current line
66    /// - `col_utf16`: 0-based UTF-16 code unit offset within current line,
67    ///   or `None` if not available (e.g., from `proc_macro2::Span`)
68    /// - `byte_offset`: 0-based byte offset from document start
69    pub fn new(
70        line: usize,
71        col_utf8: usize,
72        col_utf16: Option<usize>,
73        byte_offset: usize,
74    ) -> Self {
75        debug_assert!(line <= u32::MAX as usize, "line overflows u32: {line}");
76        debug_assert!(col_utf8 <= u32::MAX as usize, "col_utf8 overflows u32: {col_utf8}");
77        debug_assert!(byte_offset <= u32::MAX as usize, "byte_offset overflows u32: {byte_offset}");
78        if let Some(c) = col_utf16 {
79            debug_assert!(c <= u32::MAX as usize, "col_utf16 overflows u32: {c}");
80        }
81        Self {
82            line: line as u32,
83            col_utf8: col_utf8 as u32,
84            col_utf16: col_utf16.map(|c| c as u32),
85            byte_offset: byte_offset as u32,
86        }
87    }
88
89    /// Returns the 0-based line number.
90    pub fn line(&self) -> usize {
91        self.line as usize
92    }
93
94    /// Returns the 0-based `(line, col_utf8)` tuple as `u32`
95    /// values.
96    ///
97    /// This is a low-level accessor that returns the raw stored
98    /// `u32` fields without converting to `usize`. Useful for
99    /// compact representations (e.g. serialization, span maps).
100    pub fn line_col(&self) -> (u32, u32) {
101        (self.line, self.col_utf8)
102    }
103
104    /// Returns the 0-based (UTF-8) character count within the current line.
105    ///
106    /// This increments by 1 for each character regardless of byte
107    /// representation. For example, both 'a' (1 byte) and '🎉' (4 bytes) each
108    /// add 1 to this count.
109    pub fn col_utf8(&self) -> usize {
110        self.col_utf8 as usize
111    }
112
113    /// Returns the 0-based UTF-16 code unit offset within the current line,
114    /// if available.
115    ///
116    /// This is `Some` when the token source can provide UTF-16 column
117    /// information (e.g., `StrToGraphQLTokenSource`), and `None` when it
118    /// cannot (e.g., `RustMacroGraphQLTokenSource` in `libgraphql-macros`).
119    ///
120    /// For example, 'a' (1 UTF-16 code unit) adds 1 to this count, while '🎉'
121    /// (a surrogate pair requiring 2 UTF-16 code units) adds 2 to this count.
122    ///
123    /// For LSP compatibility, prefer this method when available.
124    pub fn col_utf16(&self) -> Option<usize> {
125        self.col_utf16.map(|c| c as usize)
126    }
127
128    /// Returns the 0-based byte offset from document start.
129    pub fn byte_offset(&self) -> usize {
130        self.byte_offset as usize
131    }
132}