Skip to main content

gram_codec/
lib.rs

1//! # Gram Codec
2//!
3//! Bidirectional codec between Gram notation (human-readable text format) and Pattern data structures.
4//!
5//! This crate provides:
6//! - **Parsing**: Transform Gram notation text into Pattern structures
7//! - **Serialization**: Transform Pattern structures into valid Gram notation
8//!
9//! ## Features
10//!
11//! - Full support for all Gram syntax forms (nodes, relationships, subject patterns, annotations)
12//! - Round-trip correctness (parse → serialize → parse produces equivalent pattern)
13//! - Error recovery (reports all syntax errors, not just the first)
14//! - Multi-platform support (native Rust, WebAssembly, Python)
15//!
16//! ## Example Usage
17//!
18//! ```rust,no_run
19//! use gram_codec::{parse_gram_notation, to_gram_pattern};
20//!
21//! // Parse gram notation into patterns
22//! let gram_text = "(alice:Person {name: \"Alice\"})-[:KNOWS]->(bob:Person {name: \"Bob\"})";
23//! let patterns = parse_gram_notation(gram_text)?;
24//!
25//! // Serialize patterns back to gram notation
26//! for pattern in &patterns {
27//!     let output = to_gram_pattern(pattern)?;
28//!     println!("{}", output);
29//! }
30//! # Ok::<(), Box<dyn std::error::Error>>(())
31//! ```
32//!
33//! ## Grammar Authority
34//!
35//! This codec uses [`tree-sitter-gram`](https://github.com/gram-data/tree-sitter-gram) as the
36//! authoritative grammar specification. The parser implementation is pure Rust using nom parser
37//! combinators, validated for 100% conformance with the tree-sitter-gram test corpus.
38
39// Module declarations
40pub mod ast;
41mod error;
42mod serializer;
43pub mod standard_graph;
44mod value;
45
46// TODO: Temporarily commented out during migration to nom parser
47// Old tree-sitter parser (will be replaced)
48// mod parser;
49// pub(crate) mod transform;
50
51// New nom-based parser module (under development)
52mod parser;
53
54// Optional platform-specific modules
55#[cfg(feature = "wasm")]
56mod wasm;
57
58#[cfg(feature = "python")]
59mod python;
60
61// Public API exports
62pub use ast::{AstPattern, AstSubject};
63pub use error::{Location, SerializeError};
64// Use the new nom-based ParseError from the parser module
65pub use parser::ParseError;
66pub use serializer::{to_gram, to_gram_pattern, to_gram_with_header};
67pub use value::Value;
68
69// Re-export Pattern and Subject from pattern-core for convenience
70pub use pattern_core::{Pattern, PropertyRecord as Record, Subject};
71
72// --- New nom-based parser API ---
73
74/// Parse gram notation text into a collection of Pattern structures.
75///
76/// This is the foundational parser for gram notation. It returns all top-level elements,
77/// including any leading record (which appears as a bare pattern with properties but
78/// no identity, labels, or elements).
79///
80/// # Arguments
81///
82/// * `input` - Gram notation text to parse
83///
84/// # Returns
85///
86/// * `Ok(Vec<Pattern<Subject>>)` - Successfully parsed patterns
87/// * `Err(ParseError)` - Parse error with location information
88pub fn parse_gram(input: &str) -> Result<Vec<Pattern<Subject>>, ParseError> {
89    // Handle empty/whitespace-only input
90    if input.trim().is_empty() {
91        return Ok(vec![]);
92    }
93
94    // Parse using nom parser
95    match parser::gram_patterns(input) {
96        Ok((remaining, patterns)) => {
97            // Check if all input was consumed
98            if !remaining.trim().is_empty() {
99                let offset = input.len() - remaining.len();
100                let location = parser::Location::from_offset(input, offset);
101                return Err(ParseError::UnexpectedInput {
102                    location,
103                    snippet: remaining.chars().take(20).collect(),
104                });
105            }
106            Ok(patterns)
107        }
108        Err(e) => Err(parser::ParseError::from_nom_error(input, e)),
109    }
110}
111
112/// Parse gram notation, separating an optional header record from the patterns.
113///
114/// If the first element is a bare record (identity and labels are empty, and it has no elements),
115/// it is returned separately as the header.
116///
117/// # Arguments
118///
119/// * `input` - Gram notation text to parse
120///
121/// # Returns
122///
123/// * `Ok((Option<Record>, Vec<Pattern<Subject>>))` - Successfully parsed header and patterns
124/// * `Err(ParseError)` - If parsing fails
125pub fn parse_gram_with_header(
126    input: &str,
127) -> Result<(Option<Record>, Vec<Pattern<Subject>>), ParseError> {
128    let mut patterns = parse_gram(input)?;
129
130    if patterns.is_empty() {
131        return Ok((None, vec![]));
132    }
133
134    // Check if the first pattern is a bare record
135    let first = &patterns[0];
136    if first.value.identity.0.is_empty()
137        && first.value.labels.is_empty()
138        && first.elements.is_empty()
139        && !first.value.properties.is_empty()
140    {
141        let header_record = patterns.remove(0).value.properties;
142        Ok((Some(header_record), patterns))
143    } else {
144        Ok((None, patterns))
145    }
146}
147
148/// Parse gram notation to AST (Abstract Syntax Tree).
149///
150/// Returns a single AstPattern representing the file-level pattern.
151/// This is the **recommended output format** for cross-language consumption.
152///
153/// # Why AST?
154///
155/// - **Language-agnostic**: Pure JSON, works everywhere.
156/// - **Complete**: No information loss.
157/// - **Simple**: Just patterns and subjects (no graph concepts).
158///
159/// # Arguments
160///
161/// * `input` - Gram notation text to parse
162///
163/// # Returns
164///
165/// * `Ok(AstPattern)` - The parsed pattern as AST
166/// * `Err(ParseError)` - If parsing fails
167pub fn parse_to_ast(input: &str) -> Result<AstPattern, ParseError> {
168    let patterns = parse_gram(input)?;
169
170    if patterns.is_empty() {
171        return Ok(AstPattern::empty());
172    }
173
174    // Maintain "single file-level pattern" contract for AST
175    // If there's exactly one pattern and it's not a bare record, return it.
176    // Otherwise, wrap everything in a file-level pattern.
177    let document_pattern = wrap_as_document(patterns);
178    Ok(AstPattern::from_pattern(&document_pattern))
179}
180
181/// Internal helper to wrap multiple patterns into a single document-level pattern.
182fn wrap_as_document(mut patterns: Vec<Pattern<Subject>>) -> Pattern<Subject> {
183    if patterns.len() == 1 {
184        let first = &patterns[0];
185        // If it's a "real" pattern (has identity or labels or elements), return it.
186        // Also return it if it has properties but no other fields (a bare record),
187        // because as a single pattern it represents the whole document.
188        if !first.value.identity.0.is_empty()
189            || !first.value.labels.is_empty()
190            || !first.elements.is_empty()
191            || !first.value.properties.is_empty()
192        {
193            return patterns.remove(0);
194        }
195    }
196
197    // Otherwise wrap everything (including the bare record if present)
198    // Actually, if the first is a bare record, it becomes the document's properties
199    let mut properties = Record::new();
200    if !patterns.is_empty() {
201        let first = &patterns[0];
202        if first.value.identity.0.is_empty()
203            && first.value.labels.is_empty()
204            && first.elements.is_empty()
205            && !first.value.properties.is_empty()
206        {
207            properties = patterns.remove(0).value.properties;
208        }
209    }
210
211    let subject = Subject {
212        identity: pattern_core::Symbol(String::new()),
213        labels: std::collections::HashSet::new(),
214        properties,
215    };
216    Pattern::pattern(subject, patterns)
217}
218
219/// Validate gram notation syntax without constructing patterns.
220pub fn validate_gram(input: &str) -> Result<(), ParseError> {
221    parse_gram(input).map(|_| ())
222}
223
224/// Parse a single Gram pattern from text.
225pub fn parse_single_pattern(input: &str) -> Result<Pattern<Subject>, ParseError> {
226    let patterns = parse_gram(input)?;
227
228    match patterns.len() {
229        0 => Err(ParseError::UnexpectedInput {
230            location: parser::Location::start(),
231            snippet: "Input contains no patterns".to_string(),
232        }),
233        1 => Ok(patterns.into_iter().next().unwrap()),
234        n => Err(ParseError::UnexpectedInput {
235            location: parser::Location::start(),
236            snippet: format!("Input contains {} patterns, expected exactly 1", n),
237        }),
238    }
239}
240
241// Backward compatibility aliases
242pub use parse_gram as parse_gram_notation;
243pub use standard_graph::FromGram;