json_parser_with_pest/parser.rs
1use anyhow::{Error, Result};
2use log::{error, info};
3use pest::Parser;
4use pest_derive::Parser;
5use serde_json::{Map, Value};
6use std::fs;
7use std::path::Path;
8use std::str::FromStr;
9use thiserror::Error;
10
11
12/// JSONParser struct, generated from the grammar defined in `json.pest`.
13/// This struct is used to parse JSON based on the defined rules in the `json.pest` grammar file.
14///
15/// # Grammar Documentation
16///
17/// ## WHITESPACE
18/// Matches spaces, tabs, and newlines. Used to ignore non-significant whitespace.
19///
20/// - Example:
21/// - Input: `" \n\t"`
22/// - Match: `WHITESPACE`
23///
24/// ## json
25/// Entry point for JSON. Matches the entire JSON document.
26///
27/// - Example:
28/// - Input: `{ "key": "value" }`
29/// - Match: Valid JSON structure.
30///
31/// ## value
32/// Matches any valid JSON value, including:
33/// - Objects: `{ "key": "value" }`
34/// - Arrays: `[1, 2, 3]`
35/// - Strings: `"string"`
36/// - Numbers: `123.45`
37/// - Booleans: `true`, `false`
38/// - Null: `null`
39///
40/// ## object
41/// Matches a JSON object, which contains key-value pairs.
42///
43/// - Example:
44/// - Input: `{ "key": "value", "key2": "value2" }`
45/// - Match: Valid JSON object.
46///
47/// ## array
48/// Matches a JSON array, which contains a list of values.
49///
50/// - Example:
51/// - Input: `[1, "two", null, false]`
52/// - Match: Valid JSON array.
53///
54/// ## string
55/// Matches a JSON string, supporting escape sequences and Unicode.
56///
57/// - Example:
58/// - Input: `"Hello World"`
59/// - Match: Valid JSON string.
60///
61/// ## number
62/// Matches a JSON number, including integers and floating-point numbers.
63///
64/// - Example:
65/// - Input: `123`, `-0.45`, `1e10`
66/// - Match: Valid JSON number.
67///
68/// ## boolean
69/// Matches `true` or `false`.
70///
71/// - Example:
72/// - Input: `true`
73/// - Match: Boolean true value.
74///
75/// ## null
76/// Matches the literal `null`.
77///
78/// - Example:
79/// - Input: `null`
80/// - Match: JSON null value.
81///
82/// ## date
83/// Matches dates in the format `YYYY-MM-DD`.
84///
85/// - Example:
86/// - Input: `2023-11-15`
87/// - Match: Valid date string.
88///
89/// ## identifier
90/// Matches a valid identifier starting with a letter, followed by letters, digits, or `_`.
91///
92/// - Example:
93/// - Input: `key_name`
94/// - Match: Valid identifier.
95///
96/// ## version
97/// Matches a semantic version number in `SemVer` format.
98///
99/// - Example:
100/// - Input: `1.0.0`, `2.1.3-alpha`
101/// - Match: Valid version string.
102///
103/// ## key_value_array
104/// Matches an array of objects where each object is a key-value pair.
105///
106/// - Example:
107/// - Input: `[ { "key1": "value1" }, { "key2": "value2" } ]`
108/// - Match: Valid JSON key-value array.
109
110
111#[derive(Parser)]
112#[grammar = "json.pest"]
113pub struct JSONParser;
114
115/// Custom error type for JSON parsing, schema validation, and file-related errors.
116#[derive(Error, Debug)]
117pub enum ParserError {
118 #[error("File read error: {0}")]
119 FileReadError(#[from] std::io::Error),
120 #[error("JSON parse error")]
121 JsonParseError,
122 #[error("Schema validation failed")]
123 SchemaValidationError,
124}
125
126/// Parses a JSON string using the `JSONParser` and converts it to a `serde_json::Value`.
127/// Returns a `Result` with `Value` on success or `ParserError` on failure.
128///
129/// # Arguments
130///
131/// * `json_str` - The JSON string to be parsed.
132///
133/// # Returns
134///
135/// * `Result<Value, ParserError>` - The parsed JSON as a `serde_json::Value` if successful, or an error on failure.
136pub fn parse_json(json_str: &str) -> Result<Value, ParserError> {
137 let pairs = JSONParser::parse(Rule::json, json_str).map_err(|e| {
138 println!("Parsing error in JSON input: {:?}", e);
139 ParserError::JsonParseError
140 })?;
141 parse_value(pairs)
142}
143
144/// Recursively processes `pest` parsing results and converts them to `serde_json::Value`.
145///
146/// # Arguments
147///
148/// * `pairs` - The parsed pairs of tokens from `pest`.
149///
150/// # Returns
151///
152/// * `Result<Value, ParserError>` - A `serde_json::Value` representing the parsed JSON structure, or an error if parsing fails.
153fn parse_value(mut pairs: pest::iterators::Pairs<Rule>) -> Result<Value, ParserError> {
154 let pair = pairs.next().ok_or_else(|| {
155 println!("No pairs found in input.");
156 ParserError::JsonParseError
157 })?;
158
159 match pair.as_rule() {
160 Rule::json => parse_value(pair.into_inner()),
161 Rule::object => parse_object(pair),
162 Rule::array => parse_array(pair),
163 Rule::string => Ok(Value::String(parse_string(pair)?)),
164 Rule::number => Ok(Value::Number(parse_number(pair)?)),
165 Rule::boolean => Ok(Value::Bool(pair.as_str() == "true")),
166 Rule::null => Ok(Value::Null),
167 _ => {
168 println!("Unexpected pair encountered: {:?}", pair.as_rule());
169 Err(ParserError::JsonParseError)
170 }
171 }
172}
173
174/// Parses a JSON object and returns it as a `serde_json::Value`.
175/// Handles JSON objects by parsing each key-value pair.
176///
177/// # Arguments
178///
179/// * `pair` - The `pest::iterators::Pair` containing the JSON object.
180///
181/// # Returns
182///
183/// * `Result<Value, ParserError>` - Returns a `serde_json::Value::Object` on success, or an error if parsing fails.
184fn parse_object(pair: pest::iterators::Pair<Rule>) -> Result<Value, ParserError> {
185 let mut map = Map::new();
186 for inner_pair in pair.into_inner() {
187 if inner_pair.as_rule() == Rule::pair {
188 let mut inner_rules = inner_pair.into_inner();
189 let key = parse_string(inner_rules.next().ok_or(ParserError::JsonParseError)?)?;
190 let value = parse_value(inner_rules)?;
191 map.insert(key, value);
192 }
193 }
194 Ok(Value::Object(map))
195}
196
197/// Parses a JSON array and returns it as a `serde_json::Value`.
198/// Processes each array element and collects them into a `Vec<Value>`.
199///
200/// # Arguments
201///
202/// * `pair` - The `pest::iterators::Pair` containing the JSON array.
203///
204/// # Returns
205///
206/// * `Result<Value, ParserError>` - Returns a `serde_json::Value::Array` on success, or an error if parsing fails.
207fn parse_array(pair: pest::iterators::Pair<Rule>) -> Result<Value, ParserError> {
208 let mut array = Vec::new();
209 for inner_pair in pair.into_inner() {
210 let value = parse_value(inner_pair.into_inner())?;
211 array.push(value);
212 }
213 Ok(Value::Array(array))
214}
215
216/// Parses a JSON string, handling escape sequences and Unicode characters.
217///
218/// # Arguments
219///
220/// * `pair` - The `pest::iterators::Pair` containing the JSON string.
221///
222/// # Returns
223///
224/// * `Result<String, ParserError>` - The parsed string or an error if parsing fails.
225fn parse_string(pair: pest::iterators::Pair<Rule>) -> Result<String, ParserError> {
226 let mut result = String::new();
227 for inner_pair in pair.into_inner() {
228 match inner_pair.as_rule() {
229 Rule::character => result.push_str(inner_pair.as_str()),
230 Rule::escape_sequence => {
231 let escaped = match inner_pair.as_str() {
232 "\\\"" => "\"",
233 "\\\\" => "\\",
234 "\\/" => "/",
235 "\\b" => "\u{0008}",
236 "\\f" => "\u{000C}",
237 "\\n" => "\n",
238 "\\r" => "\r",
239 "\\t" => "\t",
240 escape if escape.starts_with("\\u") => {
241 let hex = &escape[2..];
242 let code_point = u32::from_str_radix(hex, 16)
243 .map_err(|_| ParserError::JsonParseError)?;
244 let unicode_char = std::char::from_u32(code_point)
245 .ok_or(ParserError::JsonParseError)?
246 .to_string();
247 result.push_str(&unicode_char);
248 continue;
249 }
250 _ => return Err(ParserError::JsonParseError),
251 };
252 result.push_str(escaped);
253 }
254 _ => {}
255 }
256 }
257 Ok(result)
258}
259
260/// Parses a JSON number and converts it to a `serde_json::Number`.
261///
262/// # Arguments
263///
264/// * `pair` - The `pest::iterators::Pair` containing the JSON number.
265///
266/// # Returns
267///
268/// * `Result<serde_json::Number, ParserError>` - The parsed number or an error if parsing fails.
269fn parse_number(pair: pest::iterators::Pair<Rule>) -> Result<serde_json::Number, ParserError> {
270 let number_str = pair.as_str();
271 serde_json::Number::from_str(number_str).map_err(|_| ParserError::JsonParseError)
272}
273
274/// Validates a JSON object against a schema.
275/// Checks that all keys in the schema are present in the JSON object.
276///
277/// # Arguments
278///
279/// * `json` - The JSON object to validate.
280/// * `schema` - The schema to validate against.
281///
282/// # Returns
283///
284/// * `Result<(), ParserError>` - Returns Ok if validation is successful, or an error if validation fails.
285pub fn validate_json_schema(json: &Value, schema: &Value) -> Result<(), ParserError> {
286 if json.is_object() && schema.is_object() {
287 if json
288 .as_object()
289 .unwrap()
290 .keys()
291 .all(|key| schema.as_object().unwrap().contains_key(key))
292 {
293 Ok(())
294 } else {
295 Err(ParserError::SchemaValidationError)
296 }
297 } else {
298 Err(ParserError::SchemaValidationError)
299 }
300}
301
302/// Parses a specific part of the JSON file by a given key.
303/// Returns `Some(Value)` if the key exists, otherwise `None`.
304///
305/// # Arguments
306///
307/// * `json` - The JSON object to parse.
308/// * `key` - The key to extract from the JSON.
309///
310/// # Returns
311///
312/// * `Option<Value>` - The extracted value or `None` if the key is not found.
313pub fn parse_partial_json(json: &Value, key: &str) -> Option<Value> {
314 json.get(key).cloned()
315}
316
317/// Edits a JSON file by updating a specific key with a new value.
318///
319/// # Arguments
320///
321/// * `json` - A mutable reference to the JSON object to edit.
322/// * `key` - The key in the JSON object to update.
323/// * `new_value` - The new value to set for the specified key.
324///
325/// # Returns
326///
327/// * `Result<(), Error>` - Returns Ok if successful, or an error if the JSON structure is invalid.
328pub fn edit_json(json: &mut Value, key: &str, new_value: Value) -> Result<(), Error> {
329 if let Some(obj) = json.as_object_mut() {
330 obj.insert(key.to_string(), new_value);
331 Ok(())
332 } else {
333 Err(Error::msg("Invalid JSON structure for editing"))
334 }
335}
336
337/// Converts JSON to YAML or XML format based on the specified format.
338///
339/// # Arguments
340///
341/// * `json` - The JSON object to convert.
342/// * `format` - The target format ("yaml" or "xml").
343///
344/// # Returns
345///
346/// * `Result<String, Error>` - The converted JSON in the specified format, or an error if the format is unsupported.
347pub fn convert_to_format(json: &Value, format: &str) -> Result<String, Error> {
348 match format {
349 "yaml" => serde_yaml::to_string(json).map_err(|e| Error::msg(e.to_string())),
350 "xml" => convert_json_to_xml(json),
351 _ => Err(Error::msg("Unsupported format")),
352 }
353}
354
355/// Converts JSON to XML format.
356///
357/// # Arguments
358///
359/// * `json` - The JSON object to convert.
360///
361/// # Returns
362///
363/// * `Result<String, Error>` - The converted JSON in XML format, or an error if conversion fails.
364fn convert_json_to_xml(json: &Value) -> Result<String, Error> {
365 let mut writer = Vec::new();
366 write_xml(json, &mut writer, "root")?;
367 String::from_utf8(writer).map_err(|e| Error::msg(e.to_string()))
368}
369
370/// Writes XML data recursively from JSON, preserving the structure.
371///
372/// # Arguments
373///
374/// * `json` - The JSON object to write as XML.
375/// * `writer` - The writer to output the XML data.
376/// * `tag_name` - The XML tag name.
377///
378/// # Returns
379///
380/// * `Result<(), Error>` - Returns Ok if writing succeeds, or an error if it fails.
381fn write_xml<W: std::io::Write>(json: &Value, writer: &mut W, tag_name: &str) -> Result<(), Error> {
382 match json {
383 Value::Object(map) => {
384 writeln!(writer, "<{}>", tag_name)?;
385 for (key, value) in map {
386 write_xml(value, writer, key)?;
387 }
388 writeln!(writer, "</{}>", tag_name)?;
389 }
390 Value::Array(arr) => {
391 for value in arr {
392 write_xml(value, writer, tag_name)?;
393 }
394 }
395 Value::String(s) => {
396 writeln!(writer, "<{0}>{1}</{0}>", tag_name, s)?;
397 }
398 Value::Number(num) => {
399 writeln!(writer, "<{0}>{1}</{0}>", tag_name, num)?;
400 }
401 Value::Bool(b) => {
402 writeln!(writer, "<{0}>{1}</{0}>", tag_name, b)?;
403 }
404 Value::Null => {
405 writeln!(writer, "<{} />", tag_name)?;
406 }
407 }
408 Ok(())
409}
410
411/// Processes large JSON files by parsing them in chunks.
412///
413/// # Arguments
414///
415/// * `file_path` - The path to the large JSON file.
416///
417/// # Returns
418///
419/// * `Result<(), ParserError>` - Returns Ok if successful, or an error if parsing fails.
420pub fn handle_large_json(file_path: &Path) -> Result<(), ParserError> {
421 let file = fs::File::open(file_path)?;
422 let stream = serde_json::Deserializer::from_reader(file).into_iter::<Value>();
423
424 for value in stream {
425 match value {
426 Ok(json_value) => info!("Parsed chunk: {:?}", json_value),
427 Err(e) => error!("Error parsing chunk: {:?}", e),
428 }
429 }
430 Ok(())
431}
432
433/// Searches for JSON keys by a specific value, returning paths where the value is found.
434///
435/// # Arguments
436///
437/// * `json` - The JSON object to search.
438/// * `target_value` - The target value to search for.
439///
440/// # Returns
441///
442/// * `Vec<String>` - A list of paths where the target value is found.
443pub fn search_by_value(json: &Value, target_value: &str) -> Vec<String> {
444 let mut results = Vec::new();
445 search_recursive(json, target_value, &mut results, "".to_string());
446 results
447}
448
449/// Recursive helper function for `search_by_value`, traversing JSON structure.
450///
451/// # Arguments
452///
453/// * `json` - The JSON object to search.
454/// * `target_value` - The target value to search for.
455/// * `results` - A mutable vector to store the found paths.
456/// * `path` - The current JSON path.
457fn search_recursive(json: &Value, target_value: &str, results: &mut Vec<String>, path: String) {
458 match json {
459 Value::Object(map) => {
460 for (key, value) in map {
461 let new_path = format!("{}.{}", path, key)
462 .trim_start_matches('.')
463 .to_string();
464 if value.is_string() && value.as_str().unwrap() == target_value {
465 results.push(new_path.clone());
466 }
467 search_recursive(value, target_value, results, new_path);
468 }
469 }
470 Value::Array(arr) => {
471 for (index, item) in arr.iter().enumerate() {
472 let new_path = format!("{}[{}]", path, index);
473 search_recursive(item, target_value, results, new_path);
474 }
475 }
476 _ => {}
477 }
478}
479
480/// Retrieves a JSON value by a given path (e.g., "data.items[0].name").
481///
482/// # Arguments
483///
484/// * `json` - The JSON object to search.
485/// * `json_path` - The path to the target value.
486///
487/// # Returns
488///
489/// * `Option<Value>` - The found value or `None` if the path does not exist.
490pub fn get_by_path(json: &Value, json_path: &str) -> Option<Value> {
491 let mut current = json;
492 let parts = json_path.split('.');
493
494 for part in parts {
495 if part.contains('[') && part.contains(']') {
496 let name = &part[..part.find('[').unwrap()];
497 let index: usize = part[part.find('[').unwrap() + 1..part.find(']').unwrap()]
498 .parse()
499 .ok()?;
500 current = current.get(name)?.get(index)?;
501 } else {
502 current = current.get(part)?;
503 }
504 }
505 Some(current.clone())
506}
507
508/// Minifies JSON by removing whitespace.
509///
510/// # Arguments
511///
512/// * `json` - The JSON object to minify.
513///
514/// # Returns
515///
516/// * `String` - The minified JSON string.
517pub fn minify_json(json: &Value) -> String {
518 json.to_string()
519}
520
521/// Displays the structure of JSON, printing each key and nested value with indentation.
522///
523/// # Arguments
524///
525/// * `json` - The JSON object to display.
526pub fn display_structure(json: &Value) {
527 display_structure_recursive(json, 0);
528}
529
530/// Helper function for `display_structure` to recursively print JSON structure with indentation.
531///
532/// # Arguments
533///
534/// * `json` - The JSON object to display.
535/// * `indent` - The current indentation level.
536fn display_structure_recursive(json: &Value, indent: usize) {
537 match json {
538 Value::Object(map) => {
539 for (key, value) in map {
540 println!("{:indent$}{}", "", key, indent = indent);
541 display_structure_recursive(value, indent + 2);
542 }
543 }
544 Value::Array(arr) => {
545 for (index, item) in arr.iter().enumerate() {
546 println!("{:indent$}[{}]", "", index, indent = indent);
547 display_structure_recursive(item, indent + 2);
548 }
549 }
550 _ => {}
551 }
552}