gram_codec/lib.rs
1//! # Gram Codec
2//!
3//! Bidirectional codec between Gram notation (human-readable text format) and Pattern data structures.
4//!
5//! This crate provides:
6//! - **Parsing**: Transform Gram notation text into Pattern structures
7//! - **Serialization**: Transform Pattern structures into valid Gram notation
8//!
9//! ## Features
10//!
11//! - Full support for all Gram syntax forms (nodes, relationships, subject patterns, annotations)
12//! - Round-trip correctness (parse → serialize → parse produces equivalent pattern)
13//! - Error recovery (reports all syntax errors, not just the first)
14//! - Multi-platform support (native Rust, WebAssembly, Python)
15//!
16//! ## Example Usage
17//!
18//! ```rust,no_run
19//! use gram_codec::{parse_gram_notation, to_gram_pattern};
20//!
21//! // Parse gram notation into patterns
22//! let gram_text = "(alice:Person {name: \"Alice\"})-[:KNOWS]->(bob:Person {name: \"Bob\"})";
23//! let patterns = parse_gram_notation(gram_text)?;
24//!
25//! // Serialize patterns back to gram notation
26//! for pattern in &patterns {
27//! let output = to_gram_pattern(pattern)?;
28//! println!("{}", output);
29//! }
30//! # Ok::<(), Box<dyn std::error::Error>>(())
31//! ```
32//!
33//! ## Grammar Authority
34//!
35//! This codec uses [`tree-sitter-gram`](https://github.com/gram-data/tree-sitter-gram) as the
36//! authoritative grammar specification. The parser implementation is pure Rust using nom parser
37//! combinators, validated for 100% conformance with the tree-sitter-gram test corpus.
38
39// Module declarations
40pub mod ast;
41mod error;
42mod serializer;
43pub mod standard_graph;
44mod value;
45
46// TODO: Temporarily commented out during migration to nom parser
47// Old tree-sitter parser (will be replaced)
48// mod parser;
49// pub(crate) mod transform;
50
51// New nom-based parser module (under development)
52mod parser;
53
54// Optional platform-specific modules
55#[cfg(feature = "wasm")]
56mod wasm;
57
58#[cfg(feature = "python")]
59mod python;
60
61// Public API exports
62pub use ast::{AstPattern, AstSubject};
63pub use error::{Location, SerializeError};
64// Use the new nom-based ParseError from the parser module
65pub use parser::ParseError;
66pub use serializer::{to_gram, to_gram_pattern, to_gram_with_header};
67pub use value::Value;
68
69// Re-export Pattern and Subject from pattern-core for convenience
70pub use pattern_core::{Pattern, PropertyRecord as Record, Subject};
71
72// --- New nom-based parser API ---
73
74/// Parse gram notation text into a collection of Pattern structures.
75///
76/// This is the foundational parser for gram notation. It returns all top-level elements,
77/// including any leading record (which appears as a bare pattern with properties but
78/// no identity, labels, or elements).
79///
80/// # Arguments
81///
82/// * `input` - Gram notation text to parse
83///
84/// # Returns
85///
86/// * `Ok(Vec<Pattern<Subject>>)` - Successfully parsed patterns
87/// * `Err(ParseError)` - Parse error with location information
88pub fn parse_gram(input: &str) -> Result<Vec<Pattern<Subject>>, ParseError> {
89 // Handle empty/whitespace-only input
90 if input.trim().is_empty() {
91 return Ok(vec![]);
92 }
93
94 // Parse using nom parser
95 match parser::gram_patterns(input) {
96 Ok((remaining, patterns)) => {
97 // Check if all input was consumed
98 if !remaining.trim().is_empty() {
99 let offset = input.len() - remaining.len();
100 let location = parser::Location::from_offset(input, offset);
101 return Err(ParseError::UnexpectedInput {
102 location,
103 snippet: remaining.chars().take(20).collect(),
104 });
105 }
106 Ok(patterns)
107 }
108 Err(e) => Err(parser::ParseError::from_nom_error(input, e)),
109 }
110}
111
112/// Parse gram notation, separating an optional header record from the patterns.
113///
114/// If the first element is a bare record (identity and labels are empty, and it has no elements),
115/// it is returned separately as the header.
116///
117/// # Arguments
118///
119/// * `input` - Gram notation text to parse
120///
121/// # Returns
122///
123/// * `Ok((Option<Record>, Vec<Pattern<Subject>>))` - Successfully parsed header and patterns
124/// * `Err(ParseError)` - If parsing fails
125pub fn parse_gram_with_header(
126 input: &str,
127) -> Result<(Option<Record>, Vec<Pattern<Subject>>), ParseError> {
128 let mut patterns = parse_gram(input)?;
129
130 if patterns.is_empty() {
131 return Ok((None, vec![]));
132 }
133
134 // Check if the first pattern is a bare record
135 let first = &patterns[0];
136 if first.value.identity.0.is_empty()
137 && first.value.labels.is_empty()
138 && first.elements.is_empty()
139 && !first.value.properties.is_empty()
140 {
141 let header_record = patterns.remove(0).value.properties;
142 Ok((Some(header_record), patterns))
143 } else {
144 Ok((None, patterns))
145 }
146}
147
148/// Parse gram notation to AST (Abstract Syntax Tree).
149///
150/// Returns a single AstPattern representing the file-level pattern.
151/// This is the **recommended output format** for cross-language consumption.
152///
153/// # Why AST?
154///
155/// - **Language-agnostic**: Pure JSON, works everywhere.
156/// - **Complete**: No information loss.
157/// - **Simple**: Just patterns and subjects (no graph concepts).
158///
159/// # Arguments
160///
161/// * `input` - Gram notation text to parse
162///
163/// # Returns
164///
165/// * `Ok(AstPattern)` - The parsed pattern as AST
166/// * `Err(ParseError)` - If parsing fails
167pub fn parse_to_ast(input: &str) -> Result<AstPattern, ParseError> {
168 let patterns = parse_gram(input)?;
169
170 if patterns.is_empty() {
171 return Ok(AstPattern::empty());
172 }
173
174 // Maintain "single file-level pattern" contract for AST
175 // If there's exactly one pattern and it's not a bare record, return it.
176 // Otherwise, wrap everything in a file-level pattern.
177 let document_pattern = wrap_as_document(patterns);
178 Ok(AstPattern::from_pattern(&document_pattern))
179}
180
181/// Internal helper to wrap multiple patterns into a single document-level pattern.
182fn wrap_as_document(mut patterns: Vec<Pattern<Subject>>) -> Pattern<Subject> {
183 if patterns.len() == 1 {
184 let first = &patterns[0];
185 // If it's a "real" pattern (has identity or labels or elements), return it.
186 // Also return it if it has properties but no other fields (a bare record),
187 // because as a single pattern it represents the whole document.
188 if !first.value.identity.0.is_empty()
189 || !first.value.labels.is_empty()
190 || !first.elements.is_empty()
191 || !first.value.properties.is_empty()
192 {
193 return patterns.remove(0);
194 }
195 }
196
197 // Otherwise wrap everything (including the bare record if present)
198 // Actually, if the first is a bare record, it becomes the document's properties
199 let mut properties = Record::new();
200 if !patterns.is_empty() {
201 let first = &patterns[0];
202 if first.value.identity.0.is_empty()
203 && first.value.labels.is_empty()
204 && first.elements.is_empty()
205 && !first.value.properties.is_empty()
206 {
207 properties = patterns.remove(0).value.properties;
208 }
209 }
210
211 let subject = Subject {
212 identity: pattern_core::Symbol(String::new()),
213 labels: std::collections::HashSet::new(),
214 properties,
215 };
216 Pattern::pattern(subject, patterns)
217}
218
219/// Validate gram notation syntax without constructing patterns.
220pub fn validate_gram(input: &str) -> Result<(), ParseError> {
221 parse_gram(input).map(|_| ())
222}
223
224/// Parse a single Gram pattern from text.
225pub fn parse_single_pattern(input: &str) -> Result<Pattern<Subject>, ParseError> {
226 let patterns = parse_gram(input)?;
227
228 match patterns.len() {
229 0 => Err(ParseError::UnexpectedInput {
230 location: parser::Location::start(),
231 snippet: "Input contains no patterns".to_string(),
232 }),
233 1 => Ok(patterns.into_iter().next().unwrap()),
234 n => Err(ParseError::UnexpectedInput {
235 location: parser::Location::start(),
236 snippet: format!("Input contains {} patterns, expected exactly 1", n),
237 }),
238 }
239}
240
241// Backward compatibility aliases
242pub use parse_gram as parse_gram_notation;
243pub use standard_graph::FromGram;