gram_codec/lib.rs
1//! # Gram Codec
2//!
3//! Bidirectional codec between Gram notation (human-readable text format) and Pattern data structures.
4//!
5//! This crate provides:
6//! - **Parsing**: Transform Gram notation text into Pattern structures
7//! - **Serialization**: Transform Pattern structures into valid Gram notation
8//!
9//! ## Features
10//!
11//! - Full support for all Gram syntax forms (nodes, relationships, subject patterns, annotations)
12//! - Round-trip correctness (parse → serialize → parse produces equivalent pattern)
13//! - Error recovery (reports all syntax errors, not just the first)
14//! - Multi-platform support (native Rust, WebAssembly, Python)
15//!
16//! ## Example Usage
17//!
18//! ```rust,no_run
19//! use gram_codec::{parse_gram_notation, to_gram_pattern};
20//!
21//! // Parse gram notation into patterns
22//! let gram_text = "(alice:Person {name: \"Alice\"})-[:KNOWS]->(bob:Person {name: \"Bob\"})";
23//! let patterns = parse_gram_notation(gram_text)?;
24//!
25//! // Serialize patterns back to gram notation
26//! for pattern in &patterns {
27//! let output = to_gram_pattern(pattern)?;
28//! println!("{}", output);
29//! }
30//! # Ok::<(), Box<dyn std::error::Error>>(())
31//! ```
32//!
33//! ## Grammar Authority
34//!
35//! This codec uses [`tree-sitter-gram`](https://github.com/gram-data/tree-sitter-gram) as the
36//! authoritative grammar specification. The parser implementation is pure Rust using nom parser
37//! combinators, validated for 100% conformance with the tree-sitter-gram test corpus.
38
39// Module declarations
40pub mod ast;
41mod error;
42mod serializer;
43mod value;
44
45// TODO: Temporarily commented out during migration to nom parser
46// Old tree-sitter parser (will be replaced)
47// mod parser;
48// pub(crate) mod transform;
49
50// New nom-based parser module (under development)
51mod parser;
52
53// Optional platform-specific modules
54#[cfg(feature = "wasm")]
55mod wasm;
56
57#[cfg(feature = "python")]
58mod python;
59
60// Public API exports
61pub use ast::{AstPattern, AstSubject};
62pub use error::{Location, SerializeError};
63// Use the new nom-based ParseError from the parser module
64pub use parser::ParseError;
65pub use serializer::{to_gram, to_gram_pattern, to_gram_with_header};
66pub use value::Value;
67
68// Re-export Pattern and Subject from pattern-core for convenience
69pub use pattern_core::{Pattern, PropertyRecord as Record, Subject};
70
71// --- New nom-based parser API ---
72
73/// Parse gram notation text into a collection of Pattern structures.
74///
75/// This is the foundational parser for gram notation. It returns all top-level elements,
76/// including any leading record (which appears as a bare pattern with properties but
77/// no identity, labels, or elements).
78///
79/// # Arguments
80///
81/// * `input` - Gram notation text to parse
82///
83/// # Returns
84///
85/// * `Ok(Vec<Pattern<Subject>>)` - Successfully parsed patterns
86/// * `Err(ParseError)` - Parse error with location information
87pub fn parse_gram(input: &str) -> Result<Vec<Pattern<Subject>>, ParseError> {
88 // Handle empty/whitespace-only input
89 if input.trim().is_empty() {
90 return Ok(vec![]);
91 }
92
93 // Parse using nom parser
94 match parser::gram_patterns(input) {
95 Ok((remaining, patterns)) => {
96 // Check if all input was consumed
97 if !remaining.trim().is_empty() {
98 let offset = input.len() - remaining.len();
99 let location = parser::Location::from_offset(input, offset);
100 return Err(ParseError::UnexpectedInput {
101 location,
102 snippet: remaining.chars().take(20).collect(),
103 });
104 }
105 Ok(patterns)
106 }
107 Err(e) => Err(parser::ParseError::from_nom_error(input, e)),
108 }
109}
110
111/// Parse gram notation, separating an optional header record from the patterns.
112///
113/// If the first element is a bare record (identity and labels are empty, and it has no elements),
114/// it is returned separately as the header.
115///
116/// # Arguments
117///
118/// * `input` - Gram notation text to parse
119///
120/// # Returns
121///
122/// * `Ok((Option<Record>, Vec<Pattern<Subject>>))` - Successfully parsed header and patterns
123/// * `Err(ParseError)` - If parsing fails
124pub fn parse_gram_with_header(
125 input: &str,
126) -> Result<(Option<Record>, Vec<Pattern<Subject>>), ParseError> {
127 let mut patterns = parse_gram(input)?;
128
129 if patterns.is_empty() {
130 return Ok((None, vec![]));
131 }
132
133 // Check if the first pattern is a bare record
134 let first = &patterns[0];
135 if first.value.identity.0.is_empty()
136 && first.value.labels.is_empty()
137 && first.elements.is_empty()
138 && !first.value.properties.is_empty()
139 {
140 let header_record = patterns.remove(0).value.properties;
141 Ok((Some(header_record), patterns))
142 } else {
143 Ok((None, patterns))
144 }
145}
146
147/// Parse gram notation to AST (Abstract Syntax Tree).
148///
149/// Returns a single AstPattern representing the file-level pattern.
150/// This is the **recommended output format** for cross-language consumption.
151///
152/// # Why AST?
153///
154/// - **Language-agnostic**: Pure JSON, works everywhere.
155/// - **Complete**: No information loss.
156/// - **Simple**: Just patterns and subjects (no graph concepts).
157///
158/// # Arguments
159///
160/// * `input` - Gram notation text to parse
161///
162/// # Returns
163///
164/// * `Ok(AstPattern)` - The parsed pattern as AST
165/// * `Err(ParseError)` - If parsing fails
166pub fn parse_to_ast(input: &str) -> Result<AstPattern, ParseError> {
167 let patterns = parse_gram(input)?;
168
169 if patterns.is_empty() {
170 return Ok(AstPattern::empty());
171 }
172
173 // Maintain "single file-level pattern" contract for AST
174 // If there's exactly one pattern and it's not a bare record, return it.
175 // Otherwise, wrap everything in a file-level pattern.
176 let document_pattern = wrap_as_document(patterns);
177 Ok(AstPattern::from_pattern(&document_pattern))
178}
179
180/// Internal helper to wrap multiple patterns into a single document-level pattern.
181fn wrap_as_document(mut patterns: Vec<Pattern<Subject>>) -> Pattern<Subject> {
182 if patterns.len() == 1 {
183 let first = &patterns[0];
184 // If it's a "real" pattern (has identity or labels or elements), return it.
185 // Also return it if it has properties but no other fields (a bare record),
186 // because as a single pattern it represents the whole document.
187 if !first.value.identity.0.is_empty()
188 || !first.value.labels.is_empty()
189 || !first.elements.is_empty()
190 || !first.value.properties.is_empty()
191 {
192 return patterns.remove(0);
193 }
194 }
195
196 // Otherwise wrap everything (including the bare record if present)
197 // Actually, if the first is a bare record, it becomes the document's properties
198 let mut properties = Record::new();
199 if !patterns.is_empty() {
200 let first = &patterns[0];
201 if first.value.identity.0.is_empty()
202 && first.value.labels.is_empty()
203 && first.elements.is_empty()
204 && !first.value.properties.is_empty()
205 {
206 properties = patterns.remove(0).value.properties;
207 }
208 }
209
210 let subject = Subject {
211 identity: pattern_core::Symbol(String::new()),
212 labels: std::collections::HashSet::new(),
213 properties,
214 };
215 Pattern::pattern(subject, patterns)
216}
217
218/// Validate gram notation syntax without constructing patterns.
219pub fn validate_gram(input: &str) -> Result<(), ParseError> {
220 parse_gram(input).map(|_| ())
221}
222
223/// Parse a single Gram pattern from text.
224pub fn parse_single_pattern(input: &str) -> Result<Pattern<Subject>, ParseError> {
225 let patterns = parse_gram(input)?;
226
227 match patterns.len() {
228 0 => Err(ParseError::UnexpectedInput {
229 location: parser::Location::start(),
230 snippet: "Input contains no patterns".to_string(),
231 }),
232 1 => Ok(patterns.into_iter().next().unwrap()),
233 n => Err(ParseError::UnexpectedInput {
234 location: parser::Location::start(),
235 snippet: format!("Input contains {} patterns, expected exactly 1", n),
236 }),
237 }
238}
239
240// Backward compatibility aliases
241pub use parse_gram as parse_gram_notation;