parser_pda/lib.rs
1//!
2//! This crate provides a demo of using [fi_night] crate for implementation
3//! of a simple parser capable of indexing nested bracketed segments of text.
4//! The parser distinguishes sentences separated by '.' and ```[bracketed]``` text
5//! segments.
6//! The automaton implementation can be seen in modules
7//! parser_pda.rs and parser_nondet_pda.rs which implement two
8//! versions providing the same functionality.
9//!
10//! Here is an example:
11//!```rust
12//! use parser_pda::{create_parser_pda_instance} ;
13//! use parser_pda::parser_pda::{LittleParser, LittleParserTrait, LittleParserStates::*, LittleParserSignals::*,
14//! LITTLE_PARSER_GEN_CODE};
15//!
16//! fn main() {
17//! let mut parser = create_parser_pda_instance();
18//!
19//! fsm_code_to_file("parser_automaton", "target/fsm", LITTLE_PARSER_GEN_CODE);
20//!
21//! let text = "Fif[teen] men on a dead man's [[chest]]. []]Yo [[ho [[ho. And [the [bottle] ]of [[[RUM]]] ";
22//!
23//! parser.start();
24//!
25//! for ch in text.chars() {
26//! match ch {
27//! '[' => parser.next(&LeftBracket),
28//! ']' => parser.next(&RightBracket),
29//! '.' => parser.next(&EndOfSentence),
30//! ch @ _ => parser.next(&Letter(ch)),
31//! }
32//! }
33//! parser.stop();
34//!
35//! display_output(&text, &mut parser);
36//!
37//! fn display_output(text: &str, parser: &mut LittleParser) {
38//! use substring::Substring;
39//! println!("input: {}\n", text);
40//! while let Some(seg) = parser.data_mut().segments.pop() {
41//! let tabbed = "\t".repeat(seg.rank);
42//!
43//! println!("{} {}[{} {}) -> {}", tabbed, seg.tp, seg.seg.0, seg.seg.1,
44//! text.substring(seg.seg.0, seg.seg.1));
45//! }
46//! }
47//! }
48//!
49//! use std::fs::{File, create_dir};
50//! use std::io::{Seek, Write};
51//!
52//! fn fsm_code_to_file(fname: &str, path: &str, gen_code: &str) {
53//!
54//! let _ = create_dir(path);
55//!
56//! File::create(&format!("{}/{}.rs", path, fname))
57//! .and_then(|mut file| {
58//! file.seek(std::io::SeekFrom::End(0))?;
59//! file.write_all(gen_code.to_string().as_bytes())?;
60//! file.flush()
61//! });
62//! }
63//! ```
64//!
65//! The code running stores the rust code of the automaton under
66//! /target/fsm/parser_automaton.rs
67//! and emits the following output:
68//! ```text
69//! input: Fif[teen] men on a dead man's [[chest]]. []]Yo [[ho [[ho. And [the [bottle] ]of [[[RUM]]]
70//!
71//! Sentence[0 39) -> Fif[teen] men on a dead man's [[chest]]
72//! Plain[0 30) -> Fif[teen] men on a dead man's
73//! Plain[0 3) -> Fif
74//! Bracketed[4 8) -> teen
75//! Bracketed[31 38) -> [chest]
76//! Bracketed[32 37) -> chest
77//! InvalidSentence[40 56) -> []]Yo [[ho [[ho
78//! Plain[40 43) -> []
79//! Plain[40 41) ->
80//! Bracketed[42 42) ->
81//! UnbalancedRightSth[43 56) -> ]Yo [[ho [[ho
82//! Tail[57 90) -> And [the [bottle] ]of [[[RUM]]]
83//! Plain[57 80) -> And [the [bottle] ]of
84//! Plain[57 62) -> And
85//! Bracketed[63 76) -> the [bottle]
86//! Bracketed[68 74) -> bottle
87//! Bracketed[81 88) -> [[RUM]]
88//! Bracketed[82 87) -> [RUM]
89//! Bracketed[83 86) -> RUM
90//!```
91#[macro_use]
92extern crate fi_night;
93
94pub mod defs;
95pub mod parser_pda;
96pub mod parser_nondet_pda;
97
98use crate::defs::{ ParserCtx, fsm_code_to_file };
99
100pub fn create_parser_pda_instance() -> crate::parser_pda::LittleParser {
101 crate::parser_pda::LittleParser::new(
102 crate::parser_pda::LittleParserStates::Idle,
103 ParserCtx {
104 segments: std::collections::BinaryHeap::new(),
105 index: 0,
106 })
107}
108pub fn create_parser_nondet_pda_instance() -> crate::parser_nondet_pda::ParserNonDetPDA {
109 crate::parser_nondet_pda::ParserNonDetPDA::new(
110 crate::parser_nondet_pda::ParserNonDetPDAStates::Idle,
111 ParserCtx {
112 segments: std::collections::BinaryHeap::new(),
113 index: 0,
114 })
115}
116
117pub fn parser_pda_test() {
118 use crate::parser_pda::{LittleParser, LittleParserTrait, LittleParserStates::*, LittleParserSignals::*,
119 LITTLE_PARSER_GEN_CODE};
120
121 crate::fsm_code_to_file("parser_pda", "target/fsm", LITTLE_PARSER_GEN_CODE);
122
123 let mut parser = crate::parser_pda::LittleParser::new(
124 Idle,
125 ParserCtx {
126 segments: std::collections::BinaryHeap::new(),
127 index: 0,
128 });
129
130 let text = "Fif[teen] men on a dead man's [[chest]]. []]Yo [[ho [[ho. And [the [bottle] ]of [[[RUM]]] ";
131
132 parser.start();
133
134 for ch in text.chars() {
135 match ch {
136 '[' => parser.next(&LeftBracket),
137 ']' => parser.next(&RightBracket),
138 '.' => parser.next(&EndOfSentence),
139 ch @ _ => parser.next(&Letter(ch)),
140 }
141 }
142 parser.stop();
143
144 display_output(&text, &mut parser);
145
146 fn display_output(text: &str, parser: &mut LittleParser) {
147 use substring::Substring;
148 println!("input: {}\n", text);
149 while let Some(seg) = parser.data_mut().segments.pop() {
150 let tabbed = "\t".repeat(seg.rank);
151
152 println!("{} {}[{} {}) -> {}", tabbed, seg.tp, seg.seg.0, seg.seg.1,
153 text.substring(seg.seg.0, seg.seg.1));
154 }
155 }
156}
157
158pub fn parser_nondet_pda_test() {
159 use crate::parser_nondet_pda::{ParserNonDetPDA, ParserNonDetPDATrait, ParserNonDetPDAStates::*, ParserNonDetPDASignals::*,
160 PARSER_NON_DET_PDA_GEN_CODE};
161
162 crate::fsm_code_to_file("parser_nondet_pda", "target/fsm", PARSER_NON_DET_PDA_GEN_CODE);
163 let mut parser = crate::parser_nondet_pda::ParserNonDetPDA::new(
164 Idle,
165 ParserCtx {
166 segments: std::collections::BinaryHeap::new(),
167 index: 0,
168 });
169
170 let text = "Fif[teen] men on a dead man's [[chest]]. []]Yo [[ho [[ho. And [the [bottle] ]of [[[RUM]]] ";
171
172 parser.start();
173
174 for ch in text.chars() {
175 match ch {
176 '[' => parser.next(&LeftBracket),
177 ']' => parser.next(&RightBracket),
178 '.' => parser.next(&EndOfSentence),
179 ch @ _ => parser.next(&Letter(ch)),
180 }
181 }
182 parser.stop();
183
184 display_output(&text, &mut parser);
185
186 fn display_output(text: &str, parser: &mut ParserNonDetPDA) {
187 use substring::Substring;
188 println!("input: {}\n", text);
189 while let Some(seg) = parser.data_mut().segments.pop() {
190 let tabbed = "\t".repeat(seg.rank);
191
192 println!("{} {}[{} {}) -> {}", tabbed, seg.tp, seg.seg.0, seg.seg.1,
193 text.substring(seg.seg.0, seg.seg.1));
194 }
195 }
196}