Skip to main content

gram_codec/
lib.rs

1//! # Gram Codec
2//!
3//! Bidirectional codec between Gram notation (human-readable text format) and Pattern data structures.
4//!
5//! This crate provides:
6//! - **Parsing**: Transform Gram notation text into Pattern structures
7//! - **Serialization**: Transform Pattern structures into valid Gram notation
8//!
9//! ## Features
10//!
11//! - Full support for all Gram syntax forms (nodes, relationships, subject patterns, annotations)
12//! - Round-trip correctness (parse → serialize → parse produces equivalent pattern)
13//! - Error recovery (reports all syntax errors, not just the first)
14//! - Multi-platform support (native Rust, WebAssembly, Python)
15//!
16//! ## Example Usage
17//!
18//! ```rust,no_run
19//! use gram_codec::{parse_gram_notation, to_gram_pattern};
20//!
21//! // Parse gram notation into patterns
22//! let gram_text = "(alice:Person {name: \"Alice\"})-[:KNOWS]->(bob:Person {name: \"Bob\"})";
23//! let patterns = parse_gram_notation(gram_text)?;
24//!
25//! // Serialize patterns back to gram notation
26//! for pattern in &patterns {
27//!     let output = to_gram_pattern(pattern)?;
28//!     println!("{}", output);
29//! }
30//! # Ok::<(), Box<dyn std::error::Error>>(())
31//! ```
32//!
33//! ## Grammar Authority
34//!
35//! This codec uses [`tree-sitter-gram`](https://github.com/gram-data/tree-sitter-gram) as the
36//! authoritative grammar specification. The parser implementation is pure Rust using nom parser
37//! combinators, validated for 100% conformance with the tree-sitter-gram test corpus.
38
39// Module declarations
40pub mod ast;
41mod error;
42mod serializer;
43mod value;
44
45// TODO: Temporarily commented out during migration to nom parser
46// Old tree-sitter parser (will be replaced)
47// mod parser;
48// pub(crate) mod transform;
49
50// New nom-based parser module (under development)
51mod parser;
52
53// Optional platform-specific modules
54#[cfg(feature = "wasm")]
55mod wasm;
56
57#[cfg(feature = "python")]
58mod python;
59
60// Public API exports
61pub use ast::{AstPattern, AstSubject};
62pub use error::{Location, SerializeError};
63// Use the new nom-based ParseError from the parser module
64pub use parser::ParseError;
65pub use serializer::{to_gram, to_gram_pattern, to_gram_with_header};
66pub use value::Value;
67
68// Re-export Pattern and Subject from pattern-core for convenience
69pub use pattern_core::{Pattern, PropertyRecord as Record, Subject};
70
71// --- New nom-based parser API ---
72
73/// Parse gram notation text into a collection of Pattern structures.
74///
75/// This is the foundational parser for gram notation. It returns all top-level elements,
76/// including any leading record (which appears as a bare pattern with properties but
77/// no identity, labels, or elements).
78///
79/// # Arguments
80///
81/// * `input` - Gram notation text to parse
82///
83/// # Returns
84///
85/// * `Ok(Vec<Pattern<Subject>>)` - Successfully parsed patterns
86/// * `Err(ParseError)` - Parse error with location information
87pub fn parse_gram(input: &str) -> Result<Vec<Pattern<Subject>>, ParseError> {
88    // Handle empty/whitespace-only input
89    if input.trim().is_empty() {
90        return Ok(vec![]);
91    }
92
93    // Parse using nom parser
94    match parser::gram_patterns(input) {
95        Ok((remaining, patterns)) => {
96            // Check if all input was consumed
97            if !remaining.trim().is_empty() {
98                let offset = input.len() - remaining.len();
99                let location = parser::Location::from_offset(input, offset);
100                return Err(ParseError::UnexpectedInput {
101                    location,
102                    snippet: remaining.chars().take(20).collect(),
103                });
104            }
105            Ok(patterns)
106        }
107        Err(e) => Err(parser::ParseError::from_nom_error(input, e)),
108    }
109}
110
111/// Parse gram notation, separating an optional header record from the patterns.
112///
113/// If the first element is a bare record (identity and labels are empty, and it has no elements),
114/// it is returned separately as the header.
115///
116/// # Arguments
117///
118/// * `input` - Gram notation text to parse
119///
120/// # Returns
121///
122/// * `Ok((Option<Record>, Vec<Pattern<Subject>>))` - Successfully parsed header and patterns
123/// * `Err(ParseError)` - If parsing fails
124pub fn parse_gram_with_header(
125    input: &str,
126) -> Result<(Option<Record>, Vec<Pattern<Subject>>), ParseError> {
127    let mut patterns = parse_gram(input)?;
128
129    if patterns.is_empty() {
130        return Ok((None, vec![]));
131    }
132
133    // Check if the first pattern is a bare record
134    let first = &patterns[0];
135    if first.value.identity.0.is_empty()
136        && first.value.labels.is_empty()
137        && first.elements.is_empty()
138        && !first.value.properties.is_empty()
139    {
140        let header_record = patterns.remove(0).value.properties;
141        Ok((Some(header_record), patterns))
142    } else {
143        Ok((None, patterns))
144    }
145}
146
147/// Parse gram notation to AST (Abstract Syntax Tree).
148///
149/// Returns a single AstPattern representing the file-level pattern.
150/// This is the **recommended output format** for cross-language consumption.
151///
152/// # Why AST?
153///
154/// - **Language-agnostic**: Pure JSON, works everywhere.
155/// - **Complete**: No information loss.
156/// - **Simple**: Just patterns and subjects (no graph concepts).
157///
158/// # Arguments
159///
160/// * `input` - Gram notation text to parse
161///
162/// # Returns
163///
164/// * `Ok(AstPattern)` - The parsed pattern as AST
165/// * `Err(ParseError)` - If parsing fails
166pub fn parse_to_ast(input: &str) -> Result<AstPattern, ParseError> {
167    let patterns = parse_gram(input)?;
168
169    if patterns.is_empty() {
170        return Ok(AstPattern::empty());
171    }
172
173    // Maintain "single file-level pattern" contract for AST
174    // If there's exactly one pattern and it's not a bare record, return it.
175    // Otherwise, wrap everything in a file-level pattern.
176    let document_pattern = wrap_as_document(patterns);
177    Ok(AstPattern::from_pattern(&document_pattern))
178}
179
180/// Internal helper to wrap multiple patterns into a single document-level pattern.
181fn wrap_as_document(mut patterns: Vec<Pattern<Subject>>) -> Pattern<Subject> {
182    if patterns.len() == 1 {
183        let first = &patterns[0];
184        // If it's a "real" pattern (has identity or labels or elements), return it.
185        // Also return it if it has properties but no other fields (a bare record),
186        // because as a single pattern it represents the whole document.
187        if !first.value.identity.0.is_empty()
188            || !first.value.labels.is_empty()
189            || !first.elements.is_empty()
190            || !first.value.properties.is_empty()
191        {
192            return patterns.remove(0);
193        }
194    }
195
196    // Otherwise wrap everything (including the bare record if present)
197    // Actually, if the first is a bare record, it becomes the document's properties
198    let mut properties = Record::new();
199    if !patterns.is_empty() {
200        let first = &patterns[0];
201        if first.value.identity.0.is_empty()
202            && first.value.labels.is_empty()
203            && first.elements.is_empty()
204            && !first.value.properties.is_empty()
205        {
206            properties = patterns.remove(0).value.properties;
207        }
208    }
209
210    let subject = Subject {
211        identity: pattern_core::Symbol(String::new()),
212        labels: std::collections::HashSet::new(),
213        properties,
214    };
215    Pattern::pattern(subject, patterns)
216}
217
218/// Validate gram notation syntax without constructing patterns.
219pub fn validate_gram(input: &str) -> Result<(), ParseError> {
220    parse_gram(input).map(|_| ())
221}
222
223/// Parse a single Gram pattern from text.
224pub fn parse_single_pattern(input: &str) -> Result<Pattern<Subject>, ParseError> {
225    let patterns = parse_gram(input)?;
226
227    match patterns.len() {
228        0 => Err(ParseError::UnexpectedInput {
229            location: parser::Location::start(),
230            snippet: "Input contains no patterns".to_string(),
231        }),
232        1 => Ok(patterns.into_iter().next().unwrap()),
233        n => Err(ParseError::UnexpectedInput {
234            location: parser::Location::start(),
235            snippet: format!("Input contains {} patterns, expected exactly 1", n),
236        }),
237    }
238}
239
240// Backward compatibility aliases
241pub use parse_gram as parse_gram_notation;